/home/hs/.local/share/Steam/steam.sh: line 161: VERSION_ID: unbound variable /home/hs/.local/share/Steam/steam.sh: line 161: VERSION_ID: unbound variable Running Steam on arch 64-bit /home/hs/.local/share/Steam/steam.sh: line 161: VERSION_ID: unbound variable STEAM_RUNTIME is enabled automatically Installing breakpad exception handler for appid(steam)/version(1430266383) Installing breakpad exception handler for appid(steam)/version(1430266383) Installing breakpad exception handler for appid(steam)/version(1430266383) Installing breakpad exception handler for appid(steam)/version(1430266383) Fontconfig error: "/etc/fonts/conf.d/10-scale-bitmap-fonts.conf", line 70: non-double matrix element Fontconfig error: "/etc/fonts/conf.d/10-scale-bitmap-fonts.conf", line 70: non-double matrix element Fontconfig warning: "/etc/fonts/conf.d/10-scale-bitmap-fonts.conf", line 78: saw unknown, expected number [0428/202513:ERROR:browser_main_loop.cc(170)] Running without the SUID sandbox! See https://code.google.com/p/chromium/wiki/LinuxSUIDSandboxDevelopment for more information on developing with the sandbox on. Installing breakpad exception handler for appid(steamwebhelper)/version(20150428164456) Installing breakpad exception handler for appid(steamwebhelper)/version(1430239496) Installing breakpad exception handler for appid(steamwebhelper)/version(20150428164456) Installing breakpad exception handler for appid(steamwebhelper)/version(1430266383) Installing breakpad exception handler for appid(steamwebhelper)/version(1430266383) Installing breakpad exception handler for appid(steam)/version(1430266383) Installing breakpad exception handler for appid(steam)/version(1430266383) Installing breakpad exception handler for appid(steam)/version(1430266383) Installing breakpad exception handler for appid(steam)/version(1430266383) Installing breakpad exception handler for appid(steam)/version(1430266383) Installing breakpad exception handler for appid(steam)/version(1430266383) FillInMachineIDInfo took a total of 0 milliseconds Installing breakpad exception handler for appid(steam)/version(1430266383) Installing breakpad exception handler for appid(steam)/version(1430266383) ** (steam:1656): WARNING **: Unknown device type 14 ** (steam:1656): WARNING **: Could not create object for /org/freedesktop/NetworkManager/Devices/0: unknown object type Installing breakpad exception handler for appid(steam)/version(1430266383) [2015-04-28 20:25:11] Startup - updater built Apr 28 2015 16:45:07 [2015-04-28 20:25:11] Opted in to client beta 'publicbeta' via beta file You are in the 'publicbeta' client beta. Looks like steam didn't shutdown cleanly, scheduling immediate update check [2015-04-28 20:25:11] Checking for update on startup [2015-04-28 20:25:11] Checking for available updates... [2015-04-28 20:25:12] Download skipped by HTTP 304 Not Modified [2015-04-28 20:25:12] Nothing to do [2015-04-28 20:25:12] Verifying installation... [2015-04-28 20:25:12] Performing checksum verification of executable files [2015-04-28 20:25:12] Verification complete VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = extractelement <4 x float> %14, i32 3 %19 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %20 = load <16 x i8> addrspace(2)* %19, !tbaa !0 %21 = add i32 %5, %7 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = extractelement <4 x float> %22, i32 2 %26 = extractelement <4 x float> %22, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen ; E00C2000 80000000 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 3, 0, [m0] ; C8020302 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 0, 0, [m0] ; C80A0002 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 ** (steam:1656): WARNING **: Ignoring invalid property 'secondaries' ** (steam:1656): WARNING **: Ignoring invalid property 'route-data' ** (steam:1656): WARNING **: Ignoring invalid property 'address-data' ** (steam:1656): WARNING **: Ignoring invalid property 'route-data' ** (steam:1656): WARNING **: Ignoring invalid property 'address-data' VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0] 0: MUL TEMP[0], IN[0].xxxx, CONST[0] 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] 4: MOV OUT[1], IN[1] 5: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %33, %13 %46 = fmul float %33, %14 %47 = fmul float %33, %15 %48 = fmul float %33, %16 %49 = fmul float %34, %17 %50 = fadd float %49, %45 %51 = fmul float %34, %18 %52 = fadd float %51, %46 %53 = fmul float %34, %19 %54 = fadd float %53, %47 %55 = fmul float %34, %20 %56 = fadd float %55, %48 %57 = fmul float %35, %21 %58 = fadd float %57, %50 %59 = fmul float %35, %22 %60 = fadd float %59, %52 %61 = fmul float %35, %23 %62 = fadd float %61, %54 %63 = fmul float %35, %24 %64 = fadd float %63, %56 %65 = fmul float %36, %25 %66 = fadd float %65, %58 %67 = fmul float %36, %26 %68 = fadd float %67, %60 %69 = fmul float %36, %27 %70 = fadd float %69, %62 %71 = fmul float %36, %28 %72 = fadd float %71, %64 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %66, float %68, float %70, float %72) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v0 ; 10080004 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v1, s4, v4 ; D2820004 04100901 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v1, s4, v5 ; D2820005 04140901 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v0 ; 100C0004 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s4, v6 ; D2820006 04180901 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v2, s4, v6 ; D2820006 04180902 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s4, v6 ; D2820006 04180903 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v0 ; 100E0004 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v1, s4, v7 ; D2820007 041C0901 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v2, s4, v7 ; D2820007 041C0902 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v7 ; D2820000 041C0103 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[1..4] DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: MOV TEMP[0].w, IN[0].wwww 2: TXP TEMP[0], TEMP[0], SAMP[0], 2D 3: MUL TEMP[0], TEMP[0], CONST[4] 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %28 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %29 = load <8 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %31 = load <4 x i32> addrspace(2)* %30, !tbaa !0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %35 = fdiv float %32, %34 %36 = fdiv float %33, %34 %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = bitcast <8 x i32> %29 to <32 x i8> %42 = bitcast <4 x i32> %31 to <16 x i8> %43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %41, <16 x i8> %42, i32 2) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = extractelement <4 x float> %43, i32 2 %47 = extractelement <4 x float> %43, i32 3 %48 = fmul float %44, %24 %49 = fmul float %45, %25 %50 = fmul float %46, %26 %51 = fmul float %47, %27 %52 = call i32 @llvm.SI.packf16(float %48, float %49) %53 = bitcast i32 %52 to float %54 = call i32 @llvm.SI.packf16(float %50, float %51) %55 = bitcast i32 %54 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %53, float %55, float %53, float %55) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_mov_b32_e32 v3, 0x6f800000 ; 7E0602FF 6F800000 v_cmp_gt_f32_e64 s[0:1], |v2|, v3 ; D0080100 00020702 v_mov_b32_e32 v3, 0x2f800000 ; 7E0602FF 2F800000 v_cndmask_b32_e64 v3, 1.0, v3, s[0:1] ; D2000803 100206F2 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_rcp_f32_e32 v2, v2 ; 7E045502 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_mul_f32_e32 v4, v2, v4 ; 10080902 v_mul_f32_e32 v5, v4, v3 ; 100A0704 v_interp_p1_f32 v6, v0, 0, 0, [m0] ; C8180000 v_interp_p2_f32 v6, [v6], v1, 0, 0, [m0] ; C8190001 v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_mul_f32_e32 v4, v0, v3 ; 10080700 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800F00 00430004 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v3 ; 10080604 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v2 ; 100A0404 v_cvt_pkrtz_f16_f32_e32 v4, v5, v4 ; 5E080905 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s0, s[0:3], 0x10 ; C2000110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s0, v0 ; 10000000 v_cvt_pkrtz_f16_f32_e32 v0, v0, v5 ; 5E000B00 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 Generating new string page texture 2: 48x256, total string texture memory is 49.15 KB VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], GENERIC[0] DCL CONST[0..3] DCL TEMP[0] 0: MUL TEMP[0], IN[0].xxxx, CONST[0] 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] 4: MOV_SAT OUT[1], IN[1] 5: MOV OUT[2], IN[2] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %33, %13 %54 = fmul float %33, %14 %55 = fmul float %33, %15 %56 = fmul float %33, %16 %57 = fmul float %34, %17 %58 = fadd float %57, %53 %59 = fmul float %34, %18 %60 = fadd float %59, %54 %61 = fmul float %34, %19 %62 = fadd float %61, %55 %63 = fmul float %34, %20 %64 = fadd float %63, %56 %65 = fmul float %35, %21 %66 = fadd float %65, %58 %67 = fmul float %35, %22 %68 = fadd float %67, %60 %69 = fmul float %35, %23 %70 = fadd float %69, %62 %71 = fmul float %35, %24 %72 = fadd float %71, %64 %73 = fmul float %36, %25 %74 = fadd float %73, %66 %75 = fmul float %36, %26 %76 = fadd float %75, %68 %77 = fmul float %36, %27 %78 = fadd float %77, %70 %79 = fmul float %36, %28 %80 = fadd float %79, %72 %81 = call float @llvm.AMDIL.clamp.(float %41, float 0.000000e+00, float 1.000000e+00) %82 = call float @llvm.AMDIL.clamp.(float %42, float 0.000000e+00, float 1.000000e+00) %83 = call float @llvm.AMDIL.clamp.(float %43, float 0.000000e+00, float 1.000000e+00) %84 = call float @llvm.AMDIL.clamp.(float %44, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %81, float %82, float %83, float %84) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %49, float %50, float %51, float %52) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %74, float %76, float %78, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e64 v5, 0, v4 clamp ; D2060805 00020880 v_add_f32_e64 v6, 0, v3 clamp ; D2060806 00020680 v_add_f32_e64 v7, 0, v2 clamp ; D2060807 00020480 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 exp 15, 32, 0, 0, 0, v1, v7, v6, v5 ; F800020F 05060701 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v0 ; 10080004 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v1, s4, v4 ; D2820004 04100901 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v1, s4, v5 ; D2820005 04140901 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v0 ; 100C0004 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s4, v6 ; D2820006 04180901 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v2, s4, v6 ; D2820006 04180902 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s4, v6 ; D2820006 04180903 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v0 ; 100E0004 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v1, s4, v7 ; D2820007 041C0901 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v2, s4, v7 ; D2820007 041C0902 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v7 ; D2820000 041C0103 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL IN[1], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: MOV TEMP[0].w, IN[1].wwww 2: TXP TEMP[0], TEMP[0], SAMP[0], 2D 3: MUL TEMP[0], TEMP[0], IN[0] 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %33 = fdiv float %30, %32 %34 = fdiv float %31, %32 %35 = bitcast float %33 to i32 %36 = bitcast float %34 to i32 %37 = insertelement <2 x i32> undef, i32 %35, i32 0 %38 = insertelement <2 x i32> %37, i32 %36, i32 1 %39 = bitcast <8 x i32> %23 to <32 x i8> %40 = bitcast <4 x i32> %25 to <16 x i8> %41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %38, <32 x i8> %39, <16 x i8> %40, i32 2) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %26 %47 = fmul float %43, %27 %48 = fmul float %44, %28 %49 = fmul float %45, %29 %50 = call i32 @llvm.SI.packf16(float %46, float %47) %51 = bitcast i32 %50 to float %52 = call i32 @llvm.SI.packf16(float %48, float %49) %53 = bitcast i32 %52 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %51, float %53, float %51, float %53) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 3, 1, [m0] ; C8080700 v_interp_p2_f32 v2, [v2], v1, 3, 1, [m0] ; C8090701 v_mov_b32_e32 v3, 0x6f800000 ; 7E0602FF 6F800000 v_cmp_gt_f32_e64 s[0:1], |v2|, v3 ; D0080100 00020702 v_mov_b32_e32 v3, 0x2f800000 ; 7E0602FF 2F800000 v_cndmask_b32_e64 v3, 1.0, v3, s[0:1] ; D2000803 100206F2 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_rcp_f32_e32 v2, v2 ; 7E045502 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_mul_f32_e32 v4, v2, v4 ; 10080902 v_mul_f32_e32 v5, v4, v3 ; 100A0704 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_mul_f32_e32 v2, v2, v6 ; 10040D02 v_mul_f32_e32 v4, v2, v3 ; 10080702 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800F00 00010204 v_interp_p1_f32 v6, v0, 3, 0, [m0] ; C8180300 v_interp_p2_f32 v6, [v6], v1, 3, 0, [m0] ; C8190301 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v6, v6, v5 ; 100C0B06 v_interp_p1_f32 v7, v0, 2, 0, [m0] ; C81C0200 v_interp_p2_f32 v7, [v7], v1, 2, 0, [m0] ; C81D0201 v_mul_f32_e32 v7, v7, v4 ; 100E0907 v_cvt_pkrtz_f16_f32_e32 v6, v7, v6 ; 5E0C0D07 v_interp_p1_f32 v7, v0, 1, 0, [m0] ; C81C0100 v_interp_p2_f32 v7, [v7], v1, 1, 0, [m0] ; C81D0101 v_mul_f32_e32 v7, v7, v3 ; 100E0707 v_interp_p1_f32 v8, v0, 0, 0, [m0] ; C8200000 v_interp_p2_f32 v8, [v8], v1, 0, 0, [m0] ; C8210001 v_mul_f32_e32 v0, v8, v2 ; 10000508 v_cvt_pkrtz_f16_f32_e32 v0, v0, v7 ; 5E000F00 exp 15, 0, 1, 1, 1, v0, v6, v0, v6 ; F8001C0F 06000600 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %37, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v4, v2, v3 ; 5E080702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 Generating new string page texture 3: 256x256, total string texture memory is 311.30 KB FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 3, 0, [m0] ; C8020302 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 0, 0, [m0] ; C80A0002 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 Installing breakpad exception handler for appid(steam)/version(1430266383) Installing breakpad exception handler for appid(steam)/version(1430266383) Adding licenses for the following package(s): 0, 19, 25, 30, 32, 40, 79, 218, 515, 516, 700, 1579, 1774, 2487, 3083, 6208, 6970, 7877, 8041, 8186, 12225, 12361, 12456, 12517, 13054, 13410, 15190, 16223, 16531, 16549, 16699, 17915, 18265, 18613, 26375, 26390, 26595, 27644, 27835, 27888, 28596, 29856, 30669, 31797, 35183, 62892, 62894, 62922, 62923, 62924, 62925, 62926, 62927, 62935, 63065, 63341, 63683, 64229, 65525, 66253 roaming config store loaded successfully - 3080 bytes. migrating temporary roaming config store Fontconfig error: "/etc/fonts/conf.d/10-scale-bitmap-fonts.conf", line 70: non-double matrix element Fontconfig error: "/etc/fonts/conf.d/10-scale-bitmap-fonts.conf", line 70: non-double matrix element Fontconfig warning: "/etc/fonts/conf.d/10-scale-bitmap-fonts.conf", line 78: saw unknown, expected number Installing breakpad exception handler for appid(steam)/version(1430266383) Failed to init SteamVR because it isn't installed ExecCommandLine: ""/home/hs/.local/share/Steam/ubuntu12_32/steam" "steam://rungameid/570" " ExecSteamURL: "steam://rungameid/570" Installing breakpad exception handler for appid(steam)/version(1430266383) System startup time: 3.49 seconds /home/hs/.steam/root/steam.sh: line 161: VERSION_ID: unbound variable /home/hs/.steam/root/steam.sh: line 161: VERSION_ID: unbound variable Running Steam on arch 64-bit STEAM_RUNTIME has been set by the user to: /home/hs/.local/share/Steam/ubuntu12_32/steam-runtime ExecCommandLine: "/home/hs/.steam/root/ubuntu12_32/steam steam://open/driverhelperready" ExecSteamURL: "steam://open/driverhelperready" Generating new string page texture 66: 128x256, total string texture memory is 442.37 KB Installing breakpad exception handler for appid(steam)/version(1430266383) Game update: AppID 570 "Dota 2", ProcID 1777, IP 0.0.0.0:0 ERROR: ld.so: object '/home/hs/.local/share/Steam/ubuntu12_32/gameoverlayrenderer.so' from LD_PRELOAD cannot be preloaded (wrong ELF class: ELFCLASS32): ignored. ERROR: ld.so: object '/home/hs/.local/share/Steam/ubuntu12_32/gameoverlayrenderer.so' from LD_PRELOAD cannot be preloaded (wrong ELF class: ELFCLASS32): ignored. pid 1779 != 1778, skipping destruction (fork without exec?) ERROR: ld.so: object '/home/hs/.local/share/Steam/ubuntu12_32/gameoverlayrenderer.so' from LD_PRELOAD cannot be preloaded (wrong ELF class: ELFCLASS32): ignored. ERROR: ld.so: object '/home/hs/.local/share/Steam/ubuntu12_64/gameoverlayrenderer.so' from LD_PRELOAD cannot be preloaded (wrong ELF class: ELFCLASS64): ignored. SDL video target is 'x11' SDL failed to create GL compatibility profile (whichProfile=0! This system supports the OpenGL extension GL_EXT_framebuffer_object. This system supports the OpenGL extension GL_EXT_framebuffer_blit. This system supports the OpenGL extension GL_EXT_framebuffer_multisample. This system DOES NOT support the OpenGL extension GL_APPLE_fence. This system DOES NOT support the OpenGL extension GL_NV_fence. This system supports the OpenGL extension GL_ARB_sync. This system supports the OpenGL extension GL_EXT_draw_buffers2. This system DOES NOT support the OpenGL extension GL_EXT_bindable_uniform. This system DOES NOT support the OpenGL extension GL_APPLE_flush_buffer_range. This system supports the OpenGL extension GL_ARB_map_buffer_range. This system supports the OpenGL extension GL_ARB_vertex_buffer_object. This system supports the OpenGL extension GL_ARB_occlusion_query. This system DOES NOT support the OpenGL extension GL_APPLE_texture_range. This system DOES NOT support the OpenGL extension GL_APPLE_client_storage. This system DOES NOT support the OpenGL extension GL_ARB_uniform_buffer. This system supports the OpenGL extension GL_ARB_vertex_array_bgra. This system supports the OpenGL extension GL_EXT_vertex_array_bgra. This system supports the OpenGL extension GL_ARB_framebuffer_object. This system DOES NOT support the OpenGL extension GL_GREMEDY_string_marker. This system supports the OpenGL extension GL_ARB_debug_output. This system DOES NOT support the OpenGL extension GL_EXT_direct_state_access. This system DOES NOT support the OpenGL extension GL_NV_bindless_texture. This system DOES NOT support the OpenGL extension GL_AMD_pinned_memory. This system supports the OpenGL extension GL_EXT_framebuffer_multisample_blit_scaled. This system supports the OpenGL extension GL_EXT_texture_sRGB_decode. This system DOES NOT support the OpenGL extension GL_NVX_gpu_memory_info. This system DOES NOT support the OpenGL extension GL_ATI_meminfo. This system supports the OpenGL extension GL_EXT_texture_compression_s3tc. This system supports the OpenGL extension GL_EXT_texture_compression_dxt1. This system supports the OpenGL extension GL_ANGLE_texture_compression_dxt3. This system supports the OpenGL extension GL_ANGLE_texture_compression_dxt5. This system DOES NOT support the OpenGL extension GLX_EXT_swap_control_tear. GL_NV_bindless_texture: DISABLED GL_AMD_pinned_memory: DISABLED GL_EXT_texture_sRGB_decode: AVAILABLE VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = extractelement <4 x float> %14, i32 3 %19 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %20 = load <16 x i8> addrspace(2)* %19, !tbaa !0 %21 = add i32 %5, %7 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = extractelement <4 x float> %22, i32 2 %26 = extractelement <4 x float> %22, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen ; E00C2000 80000000 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 3, 0, [m0] ; C8020302 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 0, 0, [m0] ; C80A0002 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 3, 0, [m0] ; C8020302 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 0, 0, [m0] ; C80A0002 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 Installing breakpad exception handler for appid(gameoverlayui)/version(20150428164534) Installing breakpad exception handler for appid(gameoverlayui)/version(1.0) Installing breakpad exception handler for appid(gameoverlayui)/version(1.0) Fontconfig error: "/etc/fonts/conf.d/10-scale-bitmap-fonts.conf", line 70: non-double matrix element Fontconfig error: "/etc/fonts/conf.d/10-scale-bitmap-fonts.conf", line 70: non-double matrix element Fontconfig warning: "/etc/fonts/conf.d/10-scale-bitmap-fonts.conf", line 78: saw unknown, expected number Using breakpad crash handler Setting breakpad minidump AppID = 570 Forcing breakpad minidump interfaces to load Looking up breakpad interfaces from steamclient Calling BreakpadMiniDumpSystemInit Looking up breakpad interfaces from steamclient Calling BreakpadMiniDumpSystemInit Steam_SetMinidumpSteamID: Caching Steam ID: 76561197979947142 [API loaded yes] Steam_SetMinidumpSteamID: Setting Steam ID: 76561197979947142 Installing breakpad exception handler for appid(gameoverlayui)/version(1.0) GL_NVX_gpu_memory_info: UNAVAILABLE GL_ATI_meminfo: UNAVAILABLE GL_MAX_SAMPLES_EXT: 8 Adding VPK file: /home/hs/.local/share/Steam/steamapps/common/dota 2 beta/dota/sound_vo_english Adding VPK file: /home/hs/.local/share/Steam/steamapps/common/dota 2 beta/dota/pak01 Adding VPK file: /home/hs/.local/share/Steam/steamapps/common/dota 2 beta/platform/pak01 Did not detect any valid joysticks. WARNING: unable to link Test_StartScript and Test_StartScript because one or more is a ConCommand. WARNING: unable to link Test_RandomChance and Test_RandomChance because one or more is a ConCommand. WARNING: unable to link Test_LoopForNumSeconds and Test_LoopForNumSeconds because one or more is a ConCommand. WARNING: unable to link Test_Loop and Test_Loop because one or more is a ConCommand. WARNING: unable to link Test_LoopCount and Test_LoopCount because one or more is a ConCommand. WARNING: unable to link Test_StartLoop and Test_StartLoop because one or more is a ConCommand. WARNING: unable to link log_flags and log_flags because one or more is a ConCommand. WARNING: unable to link log_color and log_color because one or more is a ConCommand. WARNING: unable to link log_verbosity and log_verbosity because one or more is a ConCommand. WARNING: unable to link log_level and log_level because one or more is a ConCommand. WARNING: unable to link log_dumpchannels and log_dumpchannels because one or more is a ConCommand. Load a scaleform font provider? Creating D3D9 device with D3DCREATE_MULTITHREADED IDirect3DDevice9::Create: BackBufWidth: 2560, BackBufHeight: 1440, D3DFMT: 3, BackBufCount: 1, MultisampleType: 0, MultisampleQuality: 0 GL sampler object usage: DISABLED FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %37, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v4, v2, v3 ; 5E080702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..3] DCL TEMP[0], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV OUT[1], IMM[0].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %13, %33 %38 = fmul float %14, %33 %39 = fmul float %15, %33 %40 = fmul float %16, %33 %41 = fmul float %17, %34 %42 = fadd float %41, %37 %43 = fmul float %18, %34 %44 = fadd float %43, %38 %45 = fmul float %19, %34 %46 = fadd float %45, %39 %47 = fmul float %20, %34 %48 = fadd float %47, %40 %49 = fmul float %21, %35 %50 = fadd float %49, %42 %51 = fmul float %22, %35 %52 = fadd float %51, %44 %53 = fmul float %23, %35 %54 = fadd float %53, %46 %55 = fmul float %24, %35 %56 = fadd float %55, %48 %57 = fmul float %25, %36 %58 = fadd float %57, %50 %59 = fmul float %26, %36 %60 = fadd float %59, %52 %61 = fmul float %27, %36 %62 = fadd float %61, %54 %63 = fmul float %28, %36 %64 = fadd float %63, %56 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %60, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v0 ; 10080004 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s4, v1, v4 ; D2820004 04120204 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s4, v2, v4 ; D2820004 04120404 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s4, v3, v4 ; D2820004 04120604 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v1, v5 ; D2820005 04160204 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v2, v5 ; D2820005 04160404 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v3, v5 ; D2820005 04160604 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v0 ; 100C0004 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v1, v6 ; D2820006 041A0204 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v2, v6 ; D2820006 041A0404 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v3, v6 ; D2820006 041A0604 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v0 ; 100E0004 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v1, v7 ; D2820007 041E0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v2, v7 ; D2820007 041E0404 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s0, v3, v7 ; D2820000 041E0600 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %37, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v4, v2, v3 ; 5E080702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], CUBE 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %11) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %11) %30 = insertelement <4 x float> undef, float %26, i32 0 %31 = insertelement <4 x float> %30, float %27, i32 1 %32 = insertelement <4 x float> %31, float %28, i32 2 %33 = insertelement <4 x float> %32, float %29, i32 3 %34 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %33) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = call float @fabs(float %37) %40 = fdiv float 1.000000e+00, %39 %41 = fmul float %35, %40 %42 = fadd float %41, 1.500000e+00 %43 = fmul float %36, %40 %44 = fadd float %43, 1.500000e+00 %45 = bitcast float %44 to i32 %46 = bitcast float %42 to i32 %47 = bitcast float %38 to i32 %48 = insertelement <4 x i32> undef, i32 %45, i32 0 %49 = insertelement <4 x i32> %48, i32 %46, i32 1 %50 = insertelement <4 x i32> %49, i32 %47, i32 2 %51 = insertelement <4 x i32> %50, i32 undef, i32 3 %52 = bitcast <8 x i32> %23 to <32 x i8> %53 = bitcast <4 x i32> %25 to <16 x i8> %54 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %51, <32 x i8> %52, <16 x i8> %53, i32 4) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = extractelement <4 x float> %54, i32 3 %59 = call i32 @llvm.SI.packf16(float %55, float %56) %60 = bitcast i32 %59 to float %61 = call i32 @llvm.SI.packf16(float %57, float %58) %62 = bitcast i32 %61 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %60, float %62, float %60, float %62) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_cubeid_f32 v9, v2, v3, v4 ; D2880009 04120702 v_cubema_f32 v8, v2, v3, v4 ; D28E0008 04120702 v_cubesc_f32 v7, v2, v3, v4 ; D28A0007 04120702 v_cubetc_f32 v6, v2, v3, v4 ; D28C0006 04120702 v_rcp_f32_e64 v0, |v8| ; D3540100 00000108 v_mov_b32_e32 v1, 0x3fc00000 ; 7E0202FF 3FC00000 v_mad_f32 v8, v6, v0, v1 ; D2820008 04060106 v_mad_f32 v7, v7, v0, v1 ; D2820007 04060107 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[7:10], s[4:11], s[0:3] ; F0800F00 00010007 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v4, v2, v3 ; 5E080702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..3] DCL TEMP[0], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV OUT[1], IMM[0].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %13, %33 %38 = fmul float %14, %33 %39 = fmul float %15, %33 %40 = fmul float %16, %33 %41 = fmul float %17, %34 %42 = fadd float %41, %37 %43 = fmul float %18, %34 %44 = fadd float %43, %38 %45 = fmul float %19, %34 %46 = fadd float %45, %39 %47 = fmul float %20, %34 %48 = fadd float %47, %40 %49 = fmul float %21, %35 %50 = fadd float %49, %42 %51 = fmul float %22, %35 %52 = fadd float %51, %44 %53 = fmul float %23, %35 %54 = fadd float %53, %46 %55 = fmul float %24, %35 %56 = fadd float %55, %48 %57 = fmul float %25, %36 %58 = fadd float %57, %50 %59 = fmul float %26, %36 %60 = fadd float %59, %52 %61 = fmul float %27, %36 %62 = fadd float %61, %54 %63 = fmul float %28, %36 %64 = fadd float %63, %56 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %60, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v0 ; 10080004 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s4, v1, v4 ; D2820004 04120204 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s4, v2, v4 ; D2820004 04120404 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s4, v3, v4 ; D2820004 04120604 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v1, v5 ; D2820005 04160204 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v2, v5 ; D2820005 04160404 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v3, v5 ; D2820005 04160604 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v0 ; 100C0004 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v1, v6 ; D2820006 041A0204 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v2, v6 ; D2820006 041A0404 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v3, v6 ; D2820006 041A0604 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v0 ; 100E0004 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v1, v7 ; D2820007 041E0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v2, v7 ; D2820007 041E0404 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s0, v3, v7 ; D2820000 041E0600 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzz 1: TEX TEMP[0], TEMP[0], SAMP[0], CUBE 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = insertelement <4 x float> undef, float %26, i32 0 %30 = insertelement <4 x float> %29, float %27, i32 1 %31 = insertelement <4 x float> %30, float %28, i32 2 %32 = insertelement <4 x float> %31, float 0.000000e+00, i32 3 %33 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = call float @fabs(float %36) %39 = fdiv float 1.000000e+00, %38 %40 = fmul float %34, %39 %41 = fadd float %40, 1.500000e+00 %42 = fmul float %35, %39 %43 = fadd float %42, 1.500000e+00 %44 = bitcast float %43 to i32 %45 = bitcast float %41 to i32 %46 = bitcast float %37 to i32 %47 = insertelement <4 x i32> undef, i32 %44, i32 0 %48 = insertelement <4 x i32> %47, i32 %45, i32 1 %49 = insertelement <4 x i32> %48, i32 %46, i32 2 %50 = insertelement <4 x i32> %49, i32 undef, i32 3 %51 = bitcast <8 x i32> %23 to <32 x i8> %52 = bitcast <4 x i32> %25 to <16 x i8> %53 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %50, <32 x i8> %51, <16 x i8> %52, i32 4) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = extractelement <4 x float> %53, i32 3 %58 = call i32 @llvm.SI.packf16(float %54, float %55) %59 = bitcast i32 %58 to float %60 = call i32 @llvm.SI.packf16(float %56, float %57) %61 = bitcast i32 %60 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %59, float %61, float %59, float %61) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_mov_b32_e32 v5, 0 ; 7E0A0280 v_cubeid_f32 v9, v2, v3, v4 ; D2880009 04120702 v_cubema_f32 v8, v2, v3, v4 ; D28E0008 04120702 v_cubesc_f32 v7, v2, v3, v4 ; D28A0007 04120702 v_cubetc_f32 v6, v2, v3, v4 ; D28C0006 04120702 v_rcp_f32_e64 v0, |v8| ; D3540100 00000108 v_mov_b32_e32 v1, 0x3fc00000 ; 7E0202FF 3FC00000 v_mad_f32 v8, v6, v0, v1 ; D2820008 04060106 v_mad_f32 v7, v7, v0, v1 ; D2820007 04060107 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[7:10], s[4:11], s[0:3] ; F0800F00 00010007 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v4, v2, v3 ; 5E080702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[0] 2: DP4 TEMP[1].x, IN[2], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV OUT[2], IN[1] 5: MOV OUT[1], IN[0] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %13 %46 = fmul float %42, %14 %47 = fadd float %45, %46 %48 = fmul float %43, %15 %49 = fadd float %47, %48 %50 = fmul float %44, %16 %51 = fadd float %49, %50 %52 = fmul float %41, %17 %53 = fmul float %42, %18 %54 = fadd float %52, %53 %55 = fmul float %43, %19 %56 = fadd float %54, %55 %57 = fmul float %44, %20 %58 = fadd float %56, %57 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %25, float %26, float %27, float %28) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %33, float %34, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v5 ; D2820000 04140103 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 12, 0, 1, 0, v0, v4, v2, v1 ; F80008CF 01020400 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MUL TEMP[1].x, IN[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = fmul float %25, %26 %28 = call i32 @llvm.SI.packf16(float %22, float %23) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %24, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 v_cvt_pkrtz_f16_f32_e32 v2, v3, v2 ; 5E040503 v_interp_p1_f32 v3, v0, 3, 1, [m0] ; C80C0700 v_interp_p2_f32 v3, [v3], v1, 3, 1, [m0] ; C80D0701 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_mul_f32_e32 v3, v3, v4 ; 10060903 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_cvt_pkrtz_f16_f32_e32 v0, v4, v3 ; 5E000704 exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002 s_endpgm ; BF810000 ##### swap interval = 1 swap limit = 1 ##### VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..3] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[0] 2: DP4 TEMP[1].x, IN[1], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[1], CONST[2] 5: DP4 TEMP[2].x, IN[1], CONST[3] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], IN[0] 9: MOV OUT[0], TEMP[0] 10: MOV OUT[2], TEMP[1] 11: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %13 %46 = fmul float %42, %14 %47 = fadd float %45, %46 %48 = fmul float %43, %15 %49 = fadd float %47, %48 %50 = fmul float %44, %16 %51 = fadd float %49, %50 %52 = fmul float %41, %17 %53 = fmul float %42, %18 %54 = fadd float %52, %53 %55 = fmul float %43, %19 %56 = fadd float %54, %55 %57 = fmul float %44, %20 %58 = fadd float %56, %57 %59 = fmul float %41, %21 %60 = fmul float %42, %22 %61 = fadd float %59, %60 %62 = fmul float %43, %23 %63 = fadd float %61, %62 %64 = fmul float %44, %24 %65 = fadd float %63, %64 %66 = fmul float %41, %25 %67 = fmul float %42, %26 %68 = fadd float %66, %67 %69 = fmul float %43, %27 %70 = fadd float %68, %69 %71 = fmul float %44, %28 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %33, float %34, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %65, float %72, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 v_mov_b32_e32 v6, 0 ; 7E0C0280 exp 15, 33, 0, 0, 0, v5, v4, v6, v6 ; F800021F 06060405 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v5 ; D2820000 04140103 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v0, v4, v6, v1 ; F80008CF 01060400 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { -0.5020, 1.5960, -0.8130, 0.0000} IMM[1] FLT32 { -0.0627, 1.1640, 1.0000, -0.3920} IMM[2] FLT32 { 0.0000, -0.3920, 2.0170, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0].w, TEMP[0], SAMP[2], 2D 2: ADD TEMP[0].x, TEMP[0].wwww, IMM[0].xxxx 3: MOV TEMP[1].xy, IN[1].xyyy 4: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D 5: ADD TEMP[1].x, TEMP[1].wwww, IMM[1].xxxx 6: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 7: MAD TEMP[0], TEMP[0].xxxx, IMM[0].yzww, TEMP[1].xxxx 8: MOV TEMP[1].xy, IN[1].xyyy 9: TEX TEMP[1].w, TEMP[1], SAMP[1], 2D 10: ADD TEMP[1].x, TEMP[1].wwww, IMM[0].xxxx 11: MAD TEMP[0].xyz, TEMP[1].xxxx, IMM[2].xyzx, TEMP[0] 12: MOV TEMP[0].xyz, TEMP[0].xyzx 13: MOV TEMP[0].w, IN[0].wwww 14: MOV OUT[0], TEMP[0] 15: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = bitcast <8 x i32> %31 to <32 x i8> %42 = bitcast <4 x i32> %33 to <16 x i8> %43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %41, <16 x i8> %42, i32 2) %44 = extractelement <4 x float> %43, i32 3 %45 = fadd float %44, 0xBFE0101020000000 %46 = bitcast float %35 to i32 %47 = bitcast float %36 to i32 %48 = insertelement <2 x i32> undef, i32 %46, i32 0 %49 = insertelement <2 x i32> %48, i32 %47, i32 1 %50 = bitcast <8 x i32> %23 to <32 x i8> %51 = bitcast <4 x i32> %25 to <16 x i8> %52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %50, <16 x i8> %51, i32 2) %53 = extractelement <4 x float> %52, i32 3 %54 = fadd float %53, 0xBFB0101020000000 %55 = fmul float %54, 0x3FF29FBE80000000 %56 = fmul float %45, 0x3FF9893740000000 %57 = fadd float %56, %55 %58 = fmul float %45, 0xBFEA0418A0000000 %59 = fadd float %58, %55 %60 = fmul float %45, 0.000000e+00 %61 = fadd float %60, %55 %62 = bitcast float %35 to i32 %63 = bitcast float %36 to i32 %64 = insertelement <2 x i32> undef, i32 %62, i32 0 %65 = insertelement <2 x i32> %64, i32 %63, i32 1 %66 = bitcast <8 x i32> %27 to <32 x i8> %67 = bitcast <4 x i32> %29 to <16 x i8> %68 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %65, <32 x i8> %66, <16 x i8> %67, i32 2) %69 = extractelement <4 x float> %68, i32 3 %70 = fadd float %69, 0xBFE0101020000000 %71 = fmul float %70, 0.000000e+00 %72 = fadd float %71, %57 %73 = fmul float %70, 0xBFD9168720000000 %74 = fadd float %73, %59 %75 = fmul float %70, 0x400022D0E0000000 %76 = fadd float %75, %61 %77 = call i32 @llvm.SI.packf16(float %72, float %74) %78 = bitcast i32 %77 to float %79 = call i32 @llvm.SI.packf16(float %76, float %34) %80 = bitcast i32 %79 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %78, float %80, float %78, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_load_dwordx8 s[32:39], s[6:7], 0x10 ; C0D00710 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v4, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[12:15] ; F0800800 00680402 v_mov_b32_e32 v5, 0xbf008081 ; 7E0A02FF BF008081 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v5, v4 ; 06080905 image_sample v6, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[0:3] ; F0800800 00040602 v_mov_b32_e32 v7, 0xbd808081 ; 7E0E02FF BD808081 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v6, v6, v7 ; 060C0F06 v_mul_f32_e32 v6, 0x3f94fdf4, v6 ; 100C0CFF 3F94FDF4 v_mov_b32_e32 v7, 0xbf5020c5 ; 7E0E02FF BF5020C5 v_mad_f32 v7, v7, v4, v6 ; D2820007 041A0907 image_sample v2, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[8:11] ; F0800800 00460202 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v2, v5, v2 ; 06040505 v_mov_b32_e32 v3, 0xbec8b439 ; 7E0602FF BEC8B439 v_mad_f32 v3, v3, v2, v7 ; D2820003 041E0503 v_mov_b32_e32 v5, 0x3fcc49ba ; 7E0A02FF 3FCC49BA v_mad_f32 v5, v5, v4, v6 ; D2820005 041A0905 v_mad_f32 v5, 0, v2, v5 ; D2820005 04160480 v_cvt_pkrtz_f16_f32_e32 v3, v5, v3 ; 5E060705 v_mad_f32 v4, 0, v4, v6 ; D2820004 041A0880 v_mov_b32_e32 v5, 0x40011687 ; 7E0A02FF 40011687 v_mad_f32 v2, v5, v2, v4 ; D2820002 04120505 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_cvt_pkrtz_f16_f32_e32 v0, v2, v4 ; 5E000902 exp 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %35, float %36, float %37, float %38) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 0, 0, 1, 1, v0, v1, v2, v3 ; F800180F 03020100 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[9] DCL OUT[3], GENERIC[10] DCL OUT[4], GENERIC[11] DCL CONST[0..70] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 2.2000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: ABS TEMP[1].x, CONST[50].xxxx 2: FSLT TEMP[2].x, -TEMP[1].xxxx, TEMP[1].xxxx 3: AND TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy 4: ADD TEMP[1].xyz, IN[2].xyzz, IN[2].xyzz 5: LG2 TEMP[3].x, TEMP[1].xxxx 6: LG2 TEMP[4].x, TEMP[1].yyyy 7: MOV TEMP[3].y, TEMP[4].xxxx 8: LG2 TEMP[4].x, TEMP[1].zzzz 9: MOV TEMP[3].z, TEMP[4].xxxx 10: MUL TEMP[1].xyz, TEMP[3].xyzz, IMM[0].zzzz 11: EX2 TEMP[3].x, TEMP[1].xxxx 12: EX2 TEMP[4].x, TEMP[1].yyyy 13: MOV TEMP[3].y, TEMP[4].xxxx 14: EX2 TEMP[4].x, TEMP[1].zzzz 15: MOV TEMP[3].z, TEMP[4].xxxx 16: MUL TEMP[2].xyz, TEMP[2].xxxx, TEMP[3].xyzz 17: DP4 TEMP[0].x, IN[3], CONST[48] 18: DP4 TEMP[4].x, IN[3], CONST[49] 19: MOV TEMP[0].y, TEMP[4].xxxx 20: MAD TEMP[1].xyz, IN[4].xyzz, CONST[66].xxxx, IN[0].xyzz 21: MOV TEMP[1].w, IN[0].wwww 22: DP4 TEMP[3].x, TEMP[1], CONST[67] 23: DP4 TEMP[4].x, TEMP[1], CONST[68] 24: MOV TEMP[3].y, TEMP[4].xxxx 25: DP4 TEMP[1].x, TEMP[1], CONST[69] 26: MOV TEMP[3].z, TEMP[1].xxxx 27: MOV TEMP[3].w, CONST[0].yyyy 28: DP4 TEMP[1].x, TEMP[3], CONST[8] 29: DP4 TEMP[4].x, TEMP[3], CONST[9] 30: MOV TEMP[1].y, TEMP[4].xxxx 31: DP4 TEMP[5].x, TEMP[3], CONST[10] 32: MOV TEMP[1].z, TEMP[5].xxxx 33: DP4 TEMP[6].x, TEMP[3], CONST[11] 34: MOV TEMP[1].w, TEMP[6].xxxx 35: DP4 TEMP[7].x, TEMP[3], CONST[13] 36: MOV TEMP[7].w, TEMP[7].xxxx 37: MOV TEMP[7].xyz, TEMP[3].xyzx 38: MOV TEMP[2].w, CONST[0].xxxx 39: MOV TEMP[3], TEMP[1] 40: MAD TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz, -TEMP[6].xxxx 41: MOV TEMP[1].z, TEMP[5].xxxx 42: MOV TEMP[1].y, -TEMP[4].xxxx 43: MAD TEMP[1].xy, CONST[70].xyyy, TEMP[6].xxxx, TEMP[1].xyyy 44: MOV OUT[2], TEMP[0] 45: MOV OUT[0], TEMP[1] 46: MOV OUT[1], TEMP[3] 47: MOV OUT[3], TEMP[2] 48: MOV OUT[4], TEMP[7] 49: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 768) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 772) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 776) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 780) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 784) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 788) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 792) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 796) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 800) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1056) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1072) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1076) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1080) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1084) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1088) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1092) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1096) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1100) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1104) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1108) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1112) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1116) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1120) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1124) %60 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0 %62 = add i32 %5, %7 %63 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %62) %64 = extractelement <4 x float> %63, i32 0 %65 = extractelement <4 x float> %63, i32 1 %66 = extractelement <4 x float> %63, i32 2 %67 = extractelement <4 x float> %63, i32 3 %68 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %69 = load <16 x i8> addrspace(2)* %68, !tbaa !0 %70 = add i32 %5, %7 %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %70) %72 = extractelement <4 x float> %71, i32 0 %73 = extractelement <4 x float> %71, i32 1 %74 = extractelement <4 x float> %71, i32 2 %75 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %76 = load <16 x i8> addrspace(2)* %75, !tbaa !0 %77 = add i32 %5, %7 %78 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %77) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 1 %81 = extractelement <4 x float> %78, i32 2 %82 = extractelement <4 x float> %78, i32 3 %83 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %84 = load <16 x i8> addrspace(2)* %83, !tbaa !0 %85 = add i32 %5, %7 %86 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %85) %87 = extractelement <4 x float> %86, i32 0 %88 = extractelement <4 x float> %86, i32 1 %89 = extractelement <4 x float> %86, i32 2 %90 = call float @fabs(float %44) %91 = fsub float -0.000000e+00, %90 %92 = fcmp olt float %91, %90 %93 = sext i1 %92 to i32 %94 = bitcast i32 %93 to float %95 = bitcast float %94 to i32 %96 = and i32 %95, 1065353216 %97 = bitcast i32 %96 to float %98 = fadd float %72, %72 %99 = fadd float %73, %73 %100 = fadd float %74, %74 %101 = call float @llvm.log2.f32(float %98) %102 = call float @llvm.log2.f32(float %99) %103 = call float @llvm.log2.f32(float %100) %104 = fmul float %101, 0x40019999A0000000 %105 = fmul float %102, 0x40019999A0000000 %106 = fmul float %103, 0x40019999A0000000 %107 = call float @llvm.AMDIL.exp.(float %104) %108 = call float @llvm.AMDIL.exp.(float %105) %109 = call float @llvm.AMDIL.exp.(float %106) %110 = fmul float %97, %107 %111 = fmul float %97, %108 %112 = fmul float %97, %109 %113 = fmul float %79, %36 %114 = fmul float %80, %37 %115 = fadd float %113, %114 %116 = fmul float %81, %38 %117 = fadd float %115, %116 %118 = fmul float %82, %39 %119 = fadd float %117, %118 %120 = fmul float %79, %40 %121 = fmul float %80, %41 %122 = fadd float %120, %121 %123 = fmul float %81, %42 %124 = fadd float %122, %123 %125 = fmul float %82, %43 %126 = fadd float %124, %125 %127 = fmul float %87, %45 %128 = fadd float %127, %64 %129 = fmul float %88, %45 %130 = fadd float %129, %65 %131 = fmul float %89, %45 %132 = fadd float %131, %66 %133 = fmul float %128, %46 %134 = fmul float %130, %47 %135 = fadd float %133, %134 %136 = fmul float %132, %48 %137 = fadd float %135, %136 %138 = fmul float %67, %49 %139 = fadd float %137, %138 %140 = fmul float %128, %50 %141 = fmul float %130, %51 %142 = fadd float %140, %141 %143 = fmul float %132, %52 %144 = fadd float %142, %143 %145 = fmul float %67, %53 %146 = fadd float %144, %145 %147 = fmul float %128, %54 %148 = fmul float %130, %55 %149 = fadd float %147, %148 %150 = fmul float %132, %56 %151 = fadd float %149, %150 %152 = fmul float %67, %57 %153 = fadd float %151, %152 %154 = fmul float %139, %16 %155 = fmul float %146, %17 %156 = fadd float %154, %155 %157 = fmul float %153, %18 %158 = fadd float %156, %157 %159 = fmul float %14, %19 %160 = fadd float %158, %159 %161 = fmul float %139, %20 %162 = fmul float %146, %21 %163 = fadd float %161, %162 %164 = fmul float %153, %22 %165 = fadd float %163, %164 %166 = fmul float %14, %23 %167 = fadd float %165, %166 %168 = fmul float %139, %24 %169 = fmul float %146, %25 %170 = fadd float %168, %169 %171 = fmul float %153, %26 %172 = fadd float %170, %171 %173 = fmul float %14, %27 %174 = fadd float %172, %173 %175 = fmul float %139, %28 %176 = fmul float %146, %29 %177 = fadd float %175, %176 %178 = fmul float %153, %30 %179 = fadd float %177, %178 %180 = fmul float %14, %31 %181 = fadd float %179, %180 %182 = fmul float %139, %32 %183 = fmul float %146, %33 %184 = fadd float %182, %183 %185 = fmul float %153, %34 %186 = fadd float %184, %185 %187 = fmul float %14, %35 %188 = fadd float %186, %187 %189 = fsub float -0.000000e+00, %181 %190 = fmul float %174, %15 %191 = fadd float %190, %189 %192 = fsub float -0.000000e+00, %167 %193 = fmul float %58, %181 %194 = fadd float %193, %160 %195 = fmul float %59, %181 %196 = fadd float %195, %192 %197 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 16 %198 = load <16 x i8> addrspace(2)* %197, !tbaa !0 %199 = call float @llvm.SI.load.const(<16 x i8> %198, i32 0) %200 = fmul float %199, %160 %201 = call float @llvm.SI.load.const(<16 x i8> %198, i32 4) %202 = fmul float %201, %167 %203 = fadd float %200, %202 %204 = call float @llvm.SI.load.const(<16 x i8> %198, i32 8) %205 = fmul float %204, %174 %206 = fadd float %203, %205 %207 = call float @llvm.SI.load.const(<16 x i8> %198, i32 12) %208 = fmul float %207, %181 %209 = fadd float %206, %208 %210 = call float @llvm.SI.load.const(<16 x i8> %198, i32 16) %211 = fmul float %210, %160 %212 = call float @llvm.SI.load.const(<16 x i8> %198, i32 20) %213 = fmul float %212, %167 %214 = fadd float %211, %213 %215 = call float @llvm.SI.load.const(<16 x i8> %198, i32 24) %216 = fmul float %215, %174 %217 = fadd float %214, %216 %218 = call float @llvm.SI.load.const(<16 x i8> %198, i32 28) %219 = fmul float %218, %181 %220 = fadd float %217, %219 %221 = call float @llvm.SI.load.const(<16 x i8> %198, i32 32) %222 = fmul float %221, %160 %223 = call float @llvm.SI.load.const(<16 x i8> %198, i32 36) %224 = fmul float %223, %167 %225 = fadd float %222, %224 %226 = call float @llvm.SI.load.const(<16 x i8> %198, i32 40) %227 = fmul float %226, %174 %228 = fadd float %225, %227 %229 = call float @llvm.SI.load.const(<16 x i8> %198, i32 44) %230 = fmul float %229, %181 %231 = fadd float %228, %230 %232 = call float @llvm.SI.load.const(<16 x i8> %198, i32 48) %233 = fmul float %232, %160 %234 = call float @llvm.SI.load.const(<16 x i8> %198, i32 52) %235 = fmul float %234, %167 %236 = fadd float %233, %235 %237 = call float @llvm.SI.load.const(<16 x i8> %198, i32 56) %238 = fmul float %237, %174 %239 = fadd float %236, %238 %240 = call float @llvm.SI.load.const(<16 x i8> %198, i32 60) %241 = fmul float %240, %181 %242 = fadd float %239, %241 %243 = call float @llvm.SI.load.const(<16 x i8> %198, i32 64) %244 = fmul float %243, %160 %245 = call float @llvm.SI.load.const(<16 x i8> %198, i32 68) %246 = fmul float %245, %167 %247 = fadd float %244, %246 %248 = call float @llvm.SI.load.const(<16 x i8> %198, i32 72) %249 = fmul float %248, %174 %250 = fadd float %247, %249 %251 = call float @llvm.SI.load.const(<16 x i8> %198, i32 76) %252 = fmul float %251, %181 %253 = fadd float %250, %252 %254 = call float @llvm.SI.load.const(<16 x i8> %198, i32 80) %255 = fmul float %254, %160 %256 = call float @llvm.SI.load.const(<16 x i8> %198, i32 84) %257 = fmul float %256, %167 %258 = fadd float %255, %257 %259 = call float @llvm.SI.load.const(<16 x i8> %198, i32 88) %260 = fmul float %259, %174 %261 = fadd float %258, %260 %262 = call float @llvm.SI.load.const(<16 x i8> %198, i32 92) %263 = fmul float %262, %181 %264 = fadd float %261, %263 %265 = call float @llvm.SI.load.const(<16 x i8> %198, i32 96) %266 = fmul float %265, %160 %267 = call float @llvm.SI.load.const(<16 x i8> %198, i32 100) %268 = fmul float %267, %167 %269 = fadd float %266, %268 %270 = call float @llvm.SI.load.const(<16 x i8> %198, i32 104) %271 = fmul float %270, %174 %272 = fadd float %269, %271 %273 = call float @llvm.SI.load.const(<16 x i8> %198, i32 108) %274 = fmul float %273, %181 %275 = fadd float %272, %274 %276 = call float @llvm.SI.load.const(<16 x i8> %198, i32 112) %277 = fmul float %276, %160 %278 = call float @llvm.SI.load.const(<16 x i8> %198, i32 116) %279 = fmul float %278, %167 %280 = fadd float %277, %279 %281 = call float @llvm.SI.load.const(<16 x i8> %198, i32 120) %282 = fmul float %281, %174 %283 = fadd float %280, %282 %284 = call float @llvm.SI.load.const(<16 x i8> %198, i32 124) %285 = fmul float %284, %181 %286 = fadd float %283, %285 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %119, float %126, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %110, float %111, float %112, float %13) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %139, float %146, float %153, float %188) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %194, float %196, float %191, float %181) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %209, float %220, float %231, float %242) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %253, float %264, float %275, float %286) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { nounwind readnone readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[4:7], s[8:9], 0xc ; C082090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s0, s[4:7], 0xc5 ; C20005C5 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s0, v2 ; 100A0400 s_buffer_load_dword s0, s[4:7], 0xc4 ; C20005C4 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v1, s0, v5 ; D2820005 04140101 s_buffer_load_dword s0, s[4:7], 0xc6 ; C20005C6 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s0, v5 ; D2820005 04140103 s_buffer_load_dword s0, s[4:7], 0xc7 ; C20005C7 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s0, v5 ; D2820005 04140104 s_buffer_load_dword s0, s[4:7], 0xc1 ; C20005C1 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s0, v2 ; 100C0400 s_buffer_load_dword s0, s[4:7], 0xc0 ; C20005C0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s0, v6 ; D2820006 04180101 s_buffer_load_dword s0, s[4:7], 0xc2 ; C20005C2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s0, v6 ; D2820006 04180103 s_buffer_load_dword s0, s[4:7], 0xc3 ; C20005C3 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s0, v6 ; D2820001 04180104 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 32, 0, 0, 0, v1, v5, v2, v2 ; F800020F 02020501 s_buffer_load_dword s0, s[4:7], 0xc8 ; C20005C8 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_cmp_lt_f32_e64 s[0:1], -|s0|, |s0| ; D0020300 20000000 v_cndmask_b32_e64 v1, 0, -1, s[0:1] ; D2000801 00018280 v_and_b32_e32 v1, 1.0, v1 ; 360202F2 buffer_load_format_xyzw v[2:5], v0, s[16:19], 0 idxen ; E00C2000 80040200 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v6, v4, v4 ; 060C0904 v_log_f32_e32 v6, v6 ; 7E0C4F06 v_mul_f32_e32 v6, 0x400ccccd, v6 ; 100C0CFF 400CCCCD v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_mul_f32_e32 v6, v6, v1 ; 100C0306 v_add_f32_e32 v7, v3, v3 ; 060E0703 v_log_f32_e32 v7, v7 ; 7E0E4F07 v_mul_f32_e32 v7, 0x400ccccd, v7 ; 100E0EFF 400CCCCD v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_mul_f32_e32 v7, v7, v1 ; 100E0307 v_add_f32_e32 v2, v2, v2 ; 06040502 v_log_f32_e32 v2, v2 ; 7E044F02 v_mul_f32_e32 v2, 0x400ccccd, v2 ; 100404FF 400CCCCD v_exp_f32_e32 v2, v2 ; 7E044B02 v_mul_f32_e32 v1, v2, v1 ; 10020302 s_buffer_load_dword s0, s[4:7], 0x0 ; C2000500 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s0 ; 7E040200 exp 15, 33, 0, 0, 0, v1, v7, v6, v2 ; F800021F 02060701 s_movk_i32 s0, 0x420 ; B0000420 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v0, v5, s0, v1 ; D2820000 04040105 v_mad_f32 v9, v6, s0, v2 ; D2820009 04080106 s_movk_i32 s1, 0x434 ; B0010434 s_buffer_load_dword s1, s[4:7], s1 ; C2008401 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s1, v9 ; 10141201 s_movk_i32 s1, 0x430 ; B0010430 s_buffer_load_dword s1, s[4:7], s1 ; C2008401 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v0, s1, v10 ; D282000A 04280300 v_mad_f32 v5, v7, s0, v3 ; D2820005 040C0107 s_movk_i32 s0, 0x438 ; B0000438 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v5, s0, v10 ; D2820006 04280105 s_movk_i32 s0, 0x43c ; B000043C s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v4, s0, v6 ; D2820006 04180104 s_movk_i32 s0, 0x444 ; B0000444 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s0, v9 ; 100E1200 s_movk_i32 s0, 0x440 ; B0000440 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s0, v7 ; D2820007 041C0100 s_movk_i32 s0, 0x448 ; B0000448 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v5, s0, v7 ; D2820007 041C0105 s_movk_i32 s0, 0x44c ; B000044C s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v4, s0, v7 ; D2820007 041C0104 s_buffer_load_dword s0, s[4:7], 0x35 ; C2000535 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s0, v7 ; 10100E00 s_buffer_load_dword s0, s[4:7], 0x34 ; C2000534 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v6, s0, v8 ; D2820008 04200106 s_movk_i32 s0, 0x454 ; B0000454 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s0, v9 ; 10121200 s_movk_i32 s0, 0x450 ; B0000450 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s0, v9 ; D2820000 04240100 s_movk_i32 s0, 0x458 ; B0000458 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v5, s0, v0 ; D2820000 04000105 s_movk_i32 s0, 0x45c ; B000045C s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s0, v0 ; D2820000 04000104 s_buffer_load_dword s0, s[4:7], 0x36 ; C2000536 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v0, s0, v8 ; D2820001 04200100 s_buffer_load_dword s0, s[4:7], 0x37 ; C2000537 s_buffer_load_dword s1, s[4:7], 0x1 ; C2008501 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s0 ; 7E040200 v_mad_f32 v1, v2, s1, v1 ; D2820001 04040302 exp 15, 34, 0, 0, 0, v6, v7, v0, v1 ; F800022F 01000706 s_buffer_load_dword s0, s[4:7], 0x2d ; C200052D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v1, s0, v7 ; 10020E00 s_buffer_load_dword s0, s[4:7], 0x2c ; C200052C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v6, s0, v1 ; D2820001 04040106 s_buffer_load_dword s0, s[4:7], 0x2e ; C200052E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v0, s0, v1 ; D2820001 04040100 s_buffer_load_dword s0, s[4:7], 0x2f ; C200052F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s0 ; 7E040200 v_mad_f32 v1, v2, s1, v1 ; D2820001 04040302 s_buffer_load_dword s0, s[4:7], 0x29 ; C2000529 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s0, v7 ; 10040E00 s_buffer_load_dword s0, s[4:7], 0x28 ; C2000528 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v6, s0, v2 ; D2820002 04080106 s_buffer_load_dword s0, s[4:7], 0x2a ; C200052A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s0, v2 ; D2820002 04080100 s_buffer_load_dword s0, s[4:7], 0x2b ; C200052B s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s0 ; 7E060200 v_mad_f32 v2, v3, s1, v2 ; D2820002 04080303 s_buffer_load_dword s0, s[4:7], 0x2 ; C2000502 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v2, s0, -v1 ; D2820003 84040102 s_buffer_load_dword s0, s[4:7], 0x25 ; C2000525 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s0, v7 ; 10080E00 s_buffer_load_dword s0, s[4:7], 0x24 ; C2000524 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v6, s0, v4 ; D2820004 04100106 s_buffer_load_dword s0, s[4:7], 0x26 ; C2000526 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s0, v4 ; D2820004 04100100 s_buffer_load_dword s0, s[4:7], 0x27 ; C2000527 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s0 ; 7E0A0200 v_mad_f32 v4, v5, s1, v4 ; D2820004 04100305 s_movk_i32 s0, 0x464 ; B0000464 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s0, v1, -v4 ; D2820005 84120200 s_buffer_load_dword s0, s[4:7], 0x21 ; C2000521 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s0, v7 ; 100E0E00 s_buffer_load_dword s0, s[4:7], 0x20 ; C2000520 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v6, s0, v7 ; D2820006 041C0106 s_buffer_load_dword s0, s[4:7], 0x22 ; C2000522 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s0, v6 ; D2820000 04180100 s_buffer_load_dword s0, s[4:7], 0x23 ; C2000523 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s0 ; 7E0C0200 v_mad_f32 v0, v6, s1, v0 ; D2820000 04000306 s_movk_i32 s0, 0x460 ; B0000460 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s0, v1, v0 ; D2820006 04020200 exp 15, 12, 0, 0, 0, v6, v5, v3, v1 ; F80000CF 01030506 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v4 ; 10060804 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v0, v3 ; D2820003 040E0004 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v2, v3 ; D2820003 040E0404 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v1, v3 ; D2820003 040E0204 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v4 ; 100A0804 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v0, v5 ; D2820005 04160004 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v2, v5 ; D2820005 04160404 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v1, v5 ; D2820005 04160204 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v4 ; 100C0804 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v0, v6 ; D2820006 041A0004 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v2, v6 ; D2820006 041A0404 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v1, v6 ; D2820006 041A0204 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v4 ; 100E0804 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v0, v7 ; D2820007 041E0004 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v2, v7 ; D2820007 041E0404 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v1, v7 ; D2820007 041E0204 exp 15, 13, 0, 0, 0, v7, v6, v5, v3 ; F80000DF 03050607 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v3, s4, v4 ; 10060804 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v0, v3 ; D2820003 040E0004 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v2, v3 ; D2820003 040E0404 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v1, v3 ; D2820003 040E0204 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v4 ; 100A0804 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v0, v5 ; D2820005 04160004 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v2, v5 ; D2820005 04160404 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v1, v5 ; D2820005 04160204 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v4 ; 100C0804 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v0, v6 ; D2820006 041A0004 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v2, v6 ; D2820006 041A0404 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v1, v6 ; D2820006 041A0204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v4 ; 10080804 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s4, v0, v4 ; D2820000 04120004 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s4, v2, v0 ; D2820000 04020404 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s0, v1, v0 ; D2820000 04020200 exp 15, 14, 0, 1, 0, v0, v6, v5, v3 ; F80008EF 03050600 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..30] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { -1.0000, 1.0000, 0.0000, 0.0000} 0: ABS TEMP[0].x, CONST[12].yyyy 1: MUL TEMP[1].x, CONST[29].wwww, IN[2].wwww 2: MOV TEMP[2].xy, IN[0].xyyy 3: TEX TEMP[2], TEMP[2], SAMP[0], 2D 4: ADD TEMP[3].x, TEMP[2].wwww, IMM[0].xxxx 5: MAD TEMP[3].x, CONST[20].wwww, TEMP[3].xxxx, IMM[0].yyyy 6: MUL TEMP[3].x, TEMP[3].xxxx, CONST[1].wwww 7: MAD TEMP[4].x, TEMP[3].xxxx, IN[1].wwww, -TEMP[3].xxxx 8: MAD TEMP[3].x, CONST[12].wwww, TEMP[4].xxxx, TEMP[3].xxxx 9: FSGE TEMP[4].x, -TEMP[0].xxxx, IMM[0].zzzz 10: UIF TEMP[4].xxxx :0 11: MOV TEMP[4].x, TEMP[3].xxxx 12: ELSE :0 13: MOV TEMP[4].x, TEMP[1].xxxx 14: ENDIF 15: MOV TEMP[3].w, TEMP[4].xxxx 16: ADD TEMP[0].xyz, -TEMP[2].xyzz, IMM[0].yyyy 17: MAD TEMP[0].xyz, CONST[12].zzzz, TEMP[0].xyzz, TEMP[2].xyzz 18: ADD TEMP[4].x, TEMP[2].wwww, CONST[12].xxxx 19: MOV_SAT TEMP[4].x, TEMP[4].xxxx 20: ADD TEMP[1].xyz, IMM[0].xxxx, CONST[1].xyzz 21: MAD TEMP[1].xyz, TEMP[4].xxxx, TEMP[1].xyzz, IMM[0].yyyy 22: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz 23: MUL TEMP[1].xyz, TEMP[0].xyzz, CONST[30].xxxx 24: MAD TEMP[0].xyz, TEMP[0].xyzz, -CONST[30].xxxx, CONST[29].xyzz 25: ADD TEMP[2].xyz, CONST[20].xyzz, -IN[2].xyzz 26: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz 27: RSQ TEMP[4].x, TEMP[2].xxxx 28: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[2].xxxx 29: CMP TEMP[4].x, -TEMP[2].xxxx, TEMP[4].xxxx, IMM[0].zzzz 30: MAD TEMP[2].x, TEMP[4].xxxx, CONST[21].wwww, CONST[21].xxxx 31: MOV_SAT TEMP[2].x, TEMP[2].xxxx 32: MIN TEMP[2].x, TEMP[2].xxxx, CONST[21].zzzz 33: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 34: MAD TEMP[3].xyz, TEMP[2].xxxx, TEMP[0].xyzz, TEMP[1].xyzz 35: MOV OUT[0], TEMP[3] 36: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 328) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 332) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 344) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 348) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 464) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 468) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 472) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 476) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 480) %44 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %45 = load <8 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %47 = load <4 x i32> addrspace(2)* %46, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %55 = call float @fabs(float %29) %56 = fmul float %42, %54 %57 = bitcast float %48 to i32 %58 = bitcast float %49 to i32 %59 = insertelement <2 x i32> undef, i32 %57, i32 0 %60 = insertelement <2 x i32> %59, i32 %58, i32 1 %61 = bitcast <8 x i32> %45 to <32 x i8> %62 = bitcast <4 x i32> %47 to <16 x i8> %63 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %60, <32 x i8> %61, <16 x i8> %62, i32 2) %64 = extractelement <4 x float> %63, i32 0 %65 = extractelement <4 x float> %63, i32 1 %66 = extractelement <4 x float> %63, i32 2 %67 = extractelement <4 x float> %63, i32 3 %68 = fadd float %67, -1.000000e+00 %69 = fmul float %35, %68 %70 = fadd float %69, 1.000000e+00 %71 = fmul float %70, %27 %72 = fsub float -0.000000e+00, %71 %73 = fmul float %71, %50 %74 = fadd float %73, %72 %75 = fmul float %31, %74 %76 = fadd float %75, %71 %77 = fsub float -0.000000e+00, %55 %78 = fcmp oge float %77, 0.000000e+00 %79 = sext i1 %78 to i32 %80 = bitcast i32 %79 to float %81 = bitcast float %80 to i32 %82 = icmp ne i32 %81, 0 %. = select i1 %82, float %76, float %56 %83 = fsub float -0.000000e+00, %64 %84 = fadd float %83, 1.000000e+00 %85 = fsub float -0.000000e+00, %65 %86 = fadd float %85, 1.000000e+00 %87 = fsub float -0.000000e+00, %66 %88 = fadd float %87, 1.000000e+00 %89 = fmul float %30, %84 %90 = fadd float %89, %64 %91 = fmul float %30, %86 %92 = fadd float %91, %65 %93 = fmul float %30, %88 %94 = fadd float %93, %66 %95 = fadd float %67, %28 %96 = call float @llvm.AMDIL.clamp.(float %95, float 0.000000e+00, float 1.000000e+00) %97 = fadd float -1.000000e+00, %24 %98 = fadd float -1.000000e+00, %25 %99 = fadd float -1.000000e+00, %26 %100 = fmul float %96, %97 %101 = fadd float %100, 1.000000e+00 %102 = fmul float %96, %98 %103 = fadd float %102, 1.000000e+00 %104 = fmul float %96, %99 %105 = fadd float %104, 1.000000e+00 %106 = fmul float %90, %101 %107 = fmul float %92, %103 %108 = fmul float %94, %105 %109 = fmul float %106, %43 %110 = fmul float %107, %43 %111 = fmul float %108, %43 %112 = fsub float -0.000000e+00, %43 %113 = fmul float %106, %112 %114 = fadd float %113, %39 %115 = fsub float -0.000000e+00, %43 %116 = fmul float %107, %115 %117 = fadd float %116, %40 %118 = fsub float -0.000000e+00, %43 %119 = fmul float %108, %118 %120 = fadd float %119, %41 %121 = fsub float -0.000000e+00, %51 %122 = fadd float %32, %121 %123 = fsub float -0.000000e+00, %52 %124 = fadd float %33, %123 %125 = fsub float -0.000000e+00, %53 %126 = fadd float %34, %125 %127 = fmul float %122, %122 %128 = fmul float %124, %124 %129 = fadd float %128, %127 %130 = fmul float %126, %126 %131 = fadd float %129, %130 %132 = call float @llvm.AMDGPU.rsq.clamped.f32(float %131) %133 = fmul float %132, %131 %134 = fsub float -0.000000e+00, %131 %135 = call float @llvm.AMDGPU.cndlt(float %134, float %133, float 0.000000e+00) %136 = fmul float %135, %38 %137 = fadd float %136, %36 %138 = call float @llvm.AMDIL.clamp.(float %137, float 0.000000e+00, float 1.000000e+00) %139 = call float @llvm.minnum.f32(float %138, float %37) %140 = fmul float %139, %139 %141 = fmul float %140, %114 %142 = fadd float %141, %109 %143 = fmul float %140, %117 %144 = fadd float %143, %110 %145 = fmul float %140, %120 %146 = fadd float %145, %111 %147 = call i32 @llvm.SI.packf16(float %142, float %144) %148 = bitcast i32 %147 to float %149 = call i32 @llvm.SI.packf16(float %146, float %.) %150 = bitcast i32 %149 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %148, float %150, float %148, float %150) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #3 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #3 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800F00 00430202 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v6, 1.0, v3 ; 080C06F2 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x32 ; C2020132 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v6, v3 ; D2820006 040E0C04 s_buffer_load_dword s5, s[0:3], 0x30 ; C2028130 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v7, s5, v5 ; 060E0A05 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e64 v8, -1.0, s5 ; D2060008 00000AF3 v_mad_f32 v8, v7, v8, 1.0 ; D2820008 03CA1107 v_mul_f32_e32 v6, v8, v6 ; 100C0D08 s_buffer_load_dword s5, s[0:3], 0x75 ; C2028175 s_buffer_load_dword s6, s[0:3], 0x78 ; C2030178 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s5 ; 7E100205 v_mad_f32 v8, -v6, s6, v8 ; D2820008 24200D06 v_mul_f32_e32 v6, s6, v6 ; 100C0C06 v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900 v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901 s_buffer_load_dword s5, s[0:3], 0x51 ; C2028151 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v9, s5, v9 ; 08121205 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 s_buffer_load_dword s5, s[0:3], 0x50 ; C2028150 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v10, s5, v10 ; 08141405 v_mul_f32_e32 v10, v10, v10 ; 1014150A v_mad_f32 v9, v9, v9, v10 ; D2820009 042A1309 v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00 v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01 s_buffer_load_dword s5, s[0:3], 0x52 ; C2028152 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v10, s5, v10 ; 08141405 v_mad_f32 v9, v10, v10, v9 ; D2820009 0426150A v_rsq_clamp_f32_e32 v10, v9 ; 7E145909 v_mul_f32_e32 v10, v9, v10 ; 10141509 v_xor_b32_e32 v9, 0x80000000, v9 ; 3A1212FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v9 ; 7C081280 v_cndmask_b32_e64 v9, 0, v10, vcc ; D2000809 01AA1480 s_buffer_load_dword s5, s[0:3], 0x54 ; C2028154 s_buffer_load_dword s7, s[0:3], 0x57 ; C2038157 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v10, s5 ; 7E140205 v_mad_f32 v9, s7, v9, v10 ; D2820009 042A1207 v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 s_buffer_load_dword s5, s[0:3], 0x56 ; C2028156 s_waitcnt lgkmcnt(0) ; BF8C007F v_min_f32_e32 v9, s5, v9 ; 1E121205 v_mul_f32_e32 v9, v9, v9 ; 10121309 v_mad_f32 v6, v9, v8, v6 ; D2820006 041A1109 v_sub_f32_e32 v8, 1.0, v2 ; 081004F2 v_mad_f32 v8, s4, v8, v2 ; D2820008 040A1004 s_buffer_load_dword s5, s[0:3], 0x4 ; C2028104 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e64 v10, -1.0, s5 ; D206000A 00000AF3 v_mad_f32 v10, v7, v10, 1.0 ; D282000A 03CA1507 v_mul_f32_e32 v8, v10, v8 ; 1010110A s_buffer_load_dword s5, s[0:3], 0x74 ; C2028174 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v10, s5 ; 7E140205 v_mad_f32 v10, -v8, s6, v10 ; D282000A 24280D08 v_mul_f32_e32 v8, s6, v8 ; 10101006 v_mad_f32 v8, v9, v10, v8 ; D2820008 04221509 v_cvt_pkrtz_f16_f32_e32 v6, v8, v6 ; 5E0C0D08 v_sub_f32_e32 v8, 1.0, v4 ; 081008F2 v_mad_f32 v8, s4, v8, v4 ; D2820008 04121004 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e64 v10, -1.0, s4 ; D206000A 000008F3 v_mad_f32 v7, v7, v10, 1.0 ; D2820007 03CA1507 v_mul_f32_e32 v7, v7, v8 ; 100E1107 s_buffer_load_dword s4, s[0:3], 0x76 ; C2020176 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s4 ; 7E100204 v_mad_f32 v8, -v7, s6, v8 ; D2820008 24200D07 v_mul_f32_e32 v7, s6, v7 ; 100E0E06 v_mad_f32 v7, v9, v8, v7 ; D2820007 041E1109 v_add_f32_e32 v2, -1.0, v5 ; 06040AF3 s_buffer_load_dword s4, s[0:3], 0x53 ; C2020153 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s4, v2, 1.0 ; D2820002 03CA0404 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v2 ; 10040404 v_interp_p1_f32 v3, v0, 3, 1, [m0] ; C80C0700 v_interp_p2_f32 v3, [v3], v1, 3, 1, [m0] ; C80D0701 v_mad_f32 v3, v2, v3, -v2 ; D2820003 840A0702 s_buffer_load_dword s4, s[0:3], 0x33 ; C2020133 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s4, v3, v2 ; D2820002 040A0604 v_interp_p1_f32 v3, v0, 3, 2, [m0] ; C80C0B00 v_interp_p2_f32 v3, [v3], v1, 3, 2, [m0] ; C80D0B01 s_buffer_load_dword s4, s[0:3], 0x77 ; C2020177 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v3 ; 10000604 s_buffer_load_dword s0, s[0:3], 0x31 ; C2000131 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ge_f32_e64 s[0:1], -|s0|, 0 ; D00C0100 20010000 v_cndmask_b32_e64 v1, 0, -1, s[0:1] ; D2000801 00018280 v_cmp_ne_i32_e64 s[0:1], v1, 0 ; D10A0000 00010101 v_cndmask_b32_e64 v0, v0, v2, s[0:1] ; D2000000 00020500 v_cvt_pkrtz_f16_f32_e32 v0, v7, v0 ; 5E000107 exp 15, 0, 1, 1, 1, v6, v0, v6, v0 ; F8001C0F 00060006 s_endpgm ; BF810000 Installing breakpad exception handler for appid(steam)/version(1430266383) Precache: Took 35631 ms, Vertex 438, Pixel 8010 ConVarRef dota_fow_disable doesn't point to an existing ConVar CClientSteamContext logged on = 1 Could not get IReplayDirector interface from library serverGame.dll loaded for "Dota 2" FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %23, float %24, float %25) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 3, 0, [m0] ; C8020302 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_interp_mov_f32 v2, P0, 1, 0, [m0] ; C80A0102 v_interp_mov_f32 v3, P0, 0, 0, [m0] ; C80E0002 exp 15, 0, 0, 1, 1, v3, v2, v1, v0 ; F800180F 00010203 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %23, float %24, float %25) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 3, 0, [m0] ; C8020302 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_interp_mov_f32 v2, P0, 1, 0, [m0] ; C80A0102 v_interp_mov_f32 v3, P0, 0, 0, [m0] ; C80E0002 exp 15, 0, 0, 1, 1, v3, v2, v1, v0 ; F800180F 00010203 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL OUT[1], POSITION DCL SAMP[0] IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxy 1: TEX OUT[1].z, IN[0], SAMP[0], 2D 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 2 call void @llvm.SI.export(i32 1, i32 0, i32 0, i32 8, i32 0, float %35, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 4, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800400 00010002 v_mov_b32_e32 v1, 0 ; 7E020280 s_waitcnt vmcnt(0) ; BF8C0770 exp 1, 8, 0, 0, 0, v0, v1, v1, v1 ; F8000081 01010100 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 0, 0, 1, 1, v1, v1, v1, v0 ; F800180F 00010101 s_endpgm ; BF810000 Fontconfig error: "/etc/fonts/conf.d/10-scale-bitmap-fonts.conf", line 70: non-double matrix element Fontconfig error: "/etc/fonts/conf.d/10-scale-bitmap-fonts.conf", line 70: non-double matrix element Fontconfig warning: "/etc/fonts/conf.d/10-scale-bitmap-fonts.conf", line 78: saw unknown, expected number FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 3D 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %11) %29 = bitcast float %26 to i32 %30 = bitcast float %27 to i32 %31 = bitcast float %28 to i32 %32 = insertelement <4 x i32> undef, i32 %29, i32 0 %33 = insertelement <4 x i32> %32, i32 %30, i32 1 %34 = insertelement <4 x i32> %33, i32 %31, i32 2 %35 = insertelement <4 x i32> %34, i32 undef, i32 3 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 3) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = call i32 @llvm.SI.packf16(float %39, float %40) %44 = bitcast i32 %43 to float %45 = call i32 @llvm.SI.packf16(float %41, float %42) %46 = bitcast i32 %45 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %44, float %46, float %44, float %46) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v4, v2, v3 ; 5E080702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..3] DCL TEMP[0], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV OUT[1], IMM[0].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %13, %33 %38 = fmul float %14, %33 %39 = fmul float %15, %33 %40 = fmul float %16, %33 %41 = fmul float %17, %34 %42 = fadd float %41, %37 %43 = fmul float %18, %34 %44 = fadd float %43, %38 %45 = fmul float %19, %34 %46 = fadd float %45, %39 %47 = fmul float %20, %34 %48 = fadd float %47, %40 %49 = fmul float %21, %35 %50 = fadd float %49, %42 %51 = fmul float %22, %35 %52 = fadd float %51, %44 %53 = fmul float %23, %35 %54 = fadd float %53, %46 %55 = fmul float %24, %35 %56 = fadd float %55, %48 %57 = fmul float %25, %36 %58 = fadd float %57, %50 %59 = fmul float %26, %36 %60 = fadd float %59, %52 %61 = fmul float %27, %36 %62 = fadd float %61, %54 %63 = fmul float %28, %36 %64 = fadd float %63, %56 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %60, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v0 ; 10080004 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s4, v1, v4 ; D2820004 04120204 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s4, v2, v4 ; D2820004 04120404 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s4, v3, v4 ; D2820004 04120604 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v1, v5 ; D2820005 04160204 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v2, v5 ; D2820005 04160404 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v3, v5 ; D2820005 04160604 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v0 ; 100C0004 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v1, v6 ; D2820006 041A0204 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v2, v6 ; D2820006 041A0404 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v3, v6 ; D2820006 041A0604 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v0 ; 100E0004 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v1, v7 ; D2820007 041E0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v2, v7 ; D2820007 041E0404 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s0, v3, v7 ; D2820000 041E0600 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzz 1: TEX TEMP[0], TEMP[0], SAMP[0], 3D 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = bitcast float %26 to i32 %30 = bitcast float %27 to i32 %31 = bitcast float %28 to i32 %32 = insertelement <4 x i32> undef, i32 %29, i32 0 %33 = insertelement <4 x i32> %32, i32 %30, i32 1 %34 = insertelement <4 x i32> %33, i32 %31, i32 2 %35 = insertelement <4 x i32> %34, i32 undef, i32 3 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 3) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = call i32 @llvm.SI.packf16(float %39, float %40) %44 = bitcast i32 %43 to float %45 = call i32 @llvm.SI.packf16(float %41, float %42) %46 = bitcast i32 %45 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %44, float %46, float %44, float %46) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v4, v2, v3 ; 5E080702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[9] DCL OUT[3], GENERIC[10] DCL OUT[4], GENERIC[11] DCL CONST[0..70] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.0000, 2.2000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: LG2 TEMP[1].x, IN[1].xxxx 2: LG2 TEMP[2].x, IN[1].yyyy 3: MOV TEMP[1].y, TEMP[2].xxxx 4: LG2 TEMP[2].x, IN[1].zzzz 5: MOV TEMP[1].z, TEMP[2].xxxx 6: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[0].yyyy 7: EX2 TEMP[2].x, TEMP[1].xxxx 8: EX2 TEMP[3].x, TEMP[1].yyyy 9: MOV TEMP[2].y, TEMP[3].xxxx 10: EX2 TEMP[3].x, TEMP[1].zzzz 11: MOV TEMP[2].z, TEMP[3].xxxx 12: DP4 TEMP[0].x, IN[2], CONST[48] 13: DP4 TEMP[3].x, IN[2], CONST[49] 14: MOV TEMP[0].y, TEMP[3].xxxx 15: MAD TEMP[1].xyz, IN[3].xyzz, CONST[66].xxxx, IN[0].xyzz 16: MOV TEMP[1].w, IN[0].wwww 17: DP4 TEMP[3].x, TEMP[1], CONST[67] 18: DP4 TEMP[4].x, TEMP[1], CONST[68] 19: MOV TEMP[3].y, TEMP[4].xxxx 20: DP4 TEMP[1].x, TEMP[1], CONST[69] 21: MOV TEMP[3].z, TEMP[1].xxxx 22: MOV TEMP[3].w, CONST[0].yyyy 23: DP4 TEMP[1].x, TEMP[3], CONST[8] 24: DP4 TEMP[4].x, TEMP[3], CONST[9] 25: MOV TEMP[1].y, TEMP[4].xxxx 26: DP4 TEMP[5].x, TEMP[3], CONST[10] 27: MOV TEMP[1].z, TEMP[5].xxxx 28: DP4 TEMP[6].x, TEMP[3], CONST[11] 29: MOV TEMP[1].w, TEMP[6].xxxx 30: DP4 TEMP[7].x, TEMP[3], CONST[13] 31: MOV TEMP[7].w, TEMP[7].xxxx 32: MOV TEMP[7].xyz, TEMP[3].xyzx 33: MOV TEMP[2].w, IN[1].wwww 34: MOV TEMP[3], TEMP[1] 35: MAD TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz, -TEMP[6].xxxx 36: MOV TEMP[1].z, TEMP[5].xxxx 37: MOV TEMP[1].y, -TEMP[4].xxxx 38: MAD TEMP[1].xy, CONST[70].xyyy, TEMP[6].xxxx, TEMP[1].xyyy 39: MOV OUT[2], TEMP[0] 40: MOV OUT[0], TEMP[1] 41: MOV OUT[1], TEMP[3] 42: MOV OUT[3], TEMP[2] 43: MOV OUT[4], TEMP[7] 44: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 768) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 772) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 776) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 780) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 784) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 788) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 792) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 796) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1056) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1072) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1076) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1080) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1084) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1088) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1092) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1096) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1100) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1104) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1108) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1112) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1116) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1120) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1124) %58 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %59 = load <16 x i8> addrspace(2)* %58, !tbaa !0 %60 = add i32 %5, %7 %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %60) %62 = extractelement <4 x float> %61, i32 0 %63 = extractelement <4 x float> %61, i32 1 %64 = extractelement <4 x float> %61, i32 2 %65 = extractelement <4 x float> %61, i32 3 %66 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %67 = load <16 x i8> addrspace(2)* %66, !tbaa !0 %68 = add i32 %5, %7 %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %68) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = extractelement <4 x float> %69, i32 2 %73 = extractelement <4 x float> %69, i32 3 %74 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %75 = load <16 x i8> addrspace(2)* %74, !tbaa !0 %76 = add i32 %5, %7 %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %75, i32 0, i32 %76) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = extractelement <4 x float> %77, i32 2 %81 = extractelement <4 x float> %77, i32 3 %82 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %83 = load <16 x i8> addrspace(2)* %82, !tbaa !0 %84 = add i32 %5, %7 %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = call float @llvm.log2.f32(float %70) %90 = call float @llvm.log2.f32(float %71) %91 = call float @llvm.log2.f32(float %72) %92 = fmul float %89, 0x40019999A0000000 %93 = fmul float %90, 0x40019999A0000000 %94 = fmul float %91, 0x40019999A0000000 %95 = call float @llvm.AMDIL.exp.(float %92) %96 = call float @llvm.AMDIL.exp.(float %93) %97 = call float @llvm.AMDIL.exp.(float %94) %98 = fmul float %78, %35 %99 = fmul float %79, %36 %100 = fadd float %98, %99 %101 = fmul float %80, %37 %102 = fadd float %100, %101 %103 = fmul float %81, %38 %104 = fadd float %102, %103 %105 = fmul float %78, %39 %106 = fmul float %79, %40 %107 = fadd float %105, %106 %108 = fmul float %80, %41 %109 = fadd float %107, %108 %110 = fmul float %81, %42 %111 = fadd float %109, %110 %112 = fmul float %86, %43 %113 = fadd float %112, %62 %114 = fmul float %87, %43 %115 = fadd float %114, %63 %116 = fmul float %88, %43 %117 = fadd float %116, %64 %118 = fmul float %113, %44 %119 = fmul float %115, %45 %120 = fadd float %118, %119 %121 = fmul float %117, %46 %122 = fadd float %120, %121 %123 = fmul float %65, %47 %124 = fadd float %122, %123 %125 = fmul float %113, %48 %126 = fmul float %115, %49 %127 = fadd float %125, %126 %128 = fmul float %117, %50 %129 = fadd float %127, %128 %130 = fmul float %65, %51 %131 = fadd float %129, %130 %132 = fmul float %113, %52 %133 = fmul float %115, %53 %134 = fadd float %132, %133 %135 = fmul float %117, %54 %136 = fadd float %134, %135 %137 = fmul float %65, %55 %138 = fadd float %136, %137 %139 = fmul float %124, %15 %140 = fmul float %131, %16 %141 = fadd float %139, %140 %142 = fmul float %138, %17 %143 = fadd float %141, %142 %144 = fmul float %13, %18 %145 = fadd float %143, %144 %146 = fmul float %124, %19 %147 = fmul float %131, %20 %148 = fadd float %146, %147 %149 = fmul float %138, %21 %150 = fadd float %148, %149 %151 = fmul float %13, %22 %152 = fadd float %150, %151 %153 = fmul float %124, %23 %154 = fmul float %131, %24 %155 = fadd float %153, %154 %156 = fmul float %138, %25 %157 = fadd float %155, %156 %158 = fmul float %13, %26 %159 = fadd float %157, %158 %160 = fmul float %124, %27 %161 = fmul float %131, %28 %162 = fadd float %160, %161 %163 = fmul float %138, %29 %164 = fadd float %162, %163 %165 = fmul float %13, %30 %166 = fadd float %164, %165 %167 = fmul float %124, %31 %168 = fmul float %131, %32 %169 = fadd float %167, %168 %170 = fmul float %138, %33 %171 = fadd float %169, %170 %172 = fmul float %13, %34 %173 = fadd float %171, %172 %174 = fsub float -0.000000e+00, %166 %175 = fmul float %159, %14 %176 = fadd float %175, %174 %177 = fsub float -0.000000e+00, %152 %178 = fmul float %56, %166 %179 = fadd float %178, %145 %180 = fmul float %57, %166 %181 = fadd float %180, %177 %182 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 16 %183 = load <16 x i8> addrspace(2)* %182, !tbaa !0 %184 = call float @llvm.SI.load.const(<16 x i8> %183, i32 0) %185 = fmul float %184, %145 %186 = call float @llvm.SI.load.const(<16 x i8> %183, i32 4) %187 = fmul float %186, %152 %188 = fadd float %185, %187 %189 = call float @llvm.SI.load.const(<16 x i8> %183, i32 8) %190 = fmul float %189, %159 %191 = fadd float %188, %190 %192 = call float @llvm.SI.load.const(<16 x i8> %183, i32 12) %193 = fmul float %192, %166 %194 = fadd float %191, %193 %195 = call float @llvm.SI.load.const(<16 x i8> %183, i32 16) %196 = fmul float %195, %145 %197 = call float @llvm.SI.load.const(<16 x i8> %183, i32 20) %198 = fmul float %197, %152 %199 = fadd float %196, %198 %200 = call float @llvm.SI.load.const(<16 x i8> %183, i32 24) %201 = fmul float %200, %159 %202 = fadd float %199, %201 %203 = call float @llvm.SI.load.const(<16 x i8> %183, i32 28) %204 = fmul float %203, %166 %205 = fadd float %202, %204 %206 = call float @llvm.SI.load.const(<16 x i8> %183, i32 32) %207 = fmul float %206, %145 %208 = call float @llvm.SI.load.const(<16 x i8> %183, i32 36) %209 = fmul float %208, %152 %210 = fadd float %207, %209 %211 = call float @llvm.SI.load.const(<16 x i8> %183, i32 40) %212 = fmul float %211, %159 %213 = fadd float %210, %212 %214 = call float @llvm.SI.load.const(<16 x i8> %183, i32 44) %215 = fmul float %214, %166 %216 = fadd float %213, %215 %217 = call float @llvm.SI.load.const(<16 x i8> %183, i32 48) %218 = fmul float %217, %145 %219 = call float @llvm.SI.load.const(<16 x i8> %183, i32 52) %220 = fmul float %219, %152 %221 = fadd float %218, %220 %222 = call float @llvm.SI.load.const(<16 x i8> %183, i32 56) %223 = fmul float %222, %159 %224 = fadd float %221, %223 %225 = call float @llvm.SI.load.const(<16 x i8> %183, i32 60) %226 = fmul float %225, %166 %227 = fadd float %224, %226 %228 = call float @llvm.SI.load.const(<16 x i8> %183, i32 64) %229 = fmul float %228, %145 %230 = call float @llvm.SI.load.const(<16 x i8> %183, i32 68) %231 = fmul float %230, %152 %232 = fadd float %229, %231 %233 = call float @llvm.SI.load.const(<16 x i8> %183, i32 72) %234 = fmul float %233, %159 %235 = fadd float %232, %234 %236 = call float @llvm.SI.load.const(<16 x i8> %183, i32 76) %237 = fmul float %236, %166 %238 = fadd float %235, %237 %239 = call float @llvm.SI.load.const(<16 x i8> %183, i32 80) %240 = fmul float %239, %145 %241 = call float @llvm.SI.load.const(<16 x i8> %183, i32 84) %242 = fmul float %241, %152 %243 = fadd float %240, %242 %244 = call float @llvm.SI.load.const(<16 x i8> %183, i32 88) %245 = fmul float %244, %159 %246 = fadd float %243, %245 %247 = call float @llvm.SI.load.const(<16 x i8> %183, i32 92) %248 = fmul float %247, %166 %249 = fadd float %246, %248 %250 = call float @llvm.SI.load.const(<16 x i8> %183, i32 96) %251 = fmul float %250, %145 %252 = call float @llvm.SI.load.const(<16 x i8> %183, i32 100) %253 = fmul float %252, %152 %254 = fadd float %251, %253 %255 = call float @llvm.SI.load.const(<16 x i8> %183, i32 104) %256 = fmul float %255, %159 %257 = fadd float %254, %256 %258 = call float @llvm.SI.load.const(<16 x i8> %183, i32 108) %259 = fmul float %258, %166 %260 = fadd float %257, %259 %261 = call float @llvm.SI.load.const(<16 x i8> %183, i32 112) %262 = fmul float %261, %145 %263 = call float @llvm.SI.load.const(<16 x i8> %183, i32 116) %264 = fmul float %263, %152 %265 = fadd float %262, %264 %266 = call float @llvm.SI.load.const(<16 x i8> %183, i32 120) %267 = fmul float %266, %159 %268 = fadd float %265, %267 %269 = call float @llvm.SI.load.const(<16 x i8> %183, i32 124) %270 = fmul float %269, %166 %271 = fadd float %268, %270 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %104, float %111, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %95, float %96, float %97, float %73) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %124, float %131, float %138, float %173) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %179, float %181, float %176, float %166) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %194, float %205, float %216, float %227) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %238, float %249, float %260, float %271) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #3 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[4:7], s[8:9], 0x8 ; C0820908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s0, s[4:7], 0xc5 ; C20005C5 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s0, v2 ; 100A0400 s_buffer_load_dword s0, s[4:7], 0xc4 ; C20005C4 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v1, s0, v5 ; D2820005 04140101 s_buffer_load_dword s0, s[4:7], 0xc6 ; C20005C6 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s0, v5 ; D2820005 04140103 s_buffer_load_dword s0, s[4:7], 0xc7 ; C20005C7 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s0, v5 ; D2820005 04140104 s_buffer_load_dword s0, s[4:7], 0xc1 ; C20005C1 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s0, v2 ; 100C0400 s_buffer_load_dword s0, s[4:7], 0xc0 ; C20005C0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s0, v6 ; D2820006 04180101 s_buffer_load_dword s0, s[4:7], 0xc2 ; C20005C2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s0, v6 ; D2820006 04180103 s_buffer_load_dword s0, s[4:7], 0xc3 ; C20005C3 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s0, v6 ; D2820001 04180104 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 32, 0, 0, 0, v1, v5, v2, v2 ; F800020F 02020501 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[16:19], 0 idxen ; E00C2000 80040100 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v5, v3 ; 7E0A4F03 v_mul_f32_e32 v5, 0x400ccccd, v5 ; 100A0AFF 400CCCCD v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e32 v6, v2 ; 7E0C4F02 v_mul_f32_e32 v6, 0x400ccccd, v6 ; 100C0CFF 400CCCCD v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_log_f32_e32 v7, v1 ; 7E0E4F01 v_mul_f32_e32 v7, 0x400ccccd, v7 ; 100E0EFF 400CCCCD v_exp_f32_e32 v7, v7 ; 7E0E4B07 exp 15, 33, 0, 0, 0, v7, v6, v5, v4 ; F800021F 04050607 s_movk_i32 s0, 0x420 ; B0000420 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v0, v5, s0, v1 ; D2820000 04040105 v_mad_f32 v9, v6, s0, v2 ; D2820009 04080106 s_movk_i32 s1, 0x434 ; B0010434 s_buffer_load_dword s1, s[4:7], s1 ; C2008401 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s1, v9 ; 10141201 s_movk_i32 s1, 0x430 ; B0010430 s_buffer_load_dword s1, s[4:7], s1 ; C2008401 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v0, s1, v10 ; D282000A 04280300 v_mad_f32 v5, v7, s0, v3 ; D2820005 040C0107 s_movk_i32 s0, 0x438 ; B0000438 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v5, s0, v10 ; D2820006 04280105 s_movk_i32 s0, 0x43c ; B000043C s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v4, s0, v6 ; D2820006 04180104 s_movk_i32 s0, 0x444 ; B0000444 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s0, v9 ; 100E1200 s_movk_i32 s0, 0x440 ; B0000440 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s0, v7 ; D2820007 041C0100 s_movk_i32 s0, 0x448 ; B0000448 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v5, s0, v7 ; D2820007 041C0105 s_movk_i32 s0, 0x44c ; B000044C s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v4, s0, v7 ; D2820007 041C0104 s_buffer_load_dword s0, s[4:7], 0x35 ; C2000535 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s0, v7 ; 10100E00 s_buffer_load_dword s0, s[4:7], 0x34 ; C2000534 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v6, s0, v8 ; D2820008 04200106 s_movk_i32 s0, 0x454 ; B0000454 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s0, v9 ; 10121200 s_movk_i32 s0, 0x450 ; B0000450 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s0, v9 ; D2820000 04240100 s_movk_i32 s0, 0x458 ; B0000458 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v5, s0, v0 ; D2820000 04000105 s_movk_i32 s0, 0x45c ; B000045C s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s0, v0 ; D2820000 04000104 s_buffer_load_dword s0, s[4:7], 0x36 ; C2000536 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v0, s0, v8 ; D2820001 04200100 s_buffer_load_dword s0, s[4:7], 0x37 ; C2000537 s_buffer_load_dword s1, s[4:7], 0x1 ; C2008501 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s0 ; 7E040200 v_mad_f32 v1, v2, s1, v1 ; D2820001 04040302 exp 15, 34, 0, 0, 0, v6, v7, v0, v1 ; F800022F 01000706 s_buffer_load_dword s0, s[4:7], 0x2d ; C200052D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v1, s0, v7 ; 10020E00 s_buffer_load_dword s0, s[4:7], 0x2c ; C200052C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v6, s0, v1 ; D2820001 04040106 s_buffer_load_dword s0, s[4:7], 0x2e ; C200052E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v0, s0, v1 ; D2820001 04040100 s_buffer_load_dword s0, s[4:7], 0x2f ; C200052F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s0 ; 7E040200 v_mad_f32 v1, v2, s1, v1 ; D2820001 04040302 s_buffer_load_dword s0, s[4:7], 0x29 ; C2000529 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s0, v7 ; 10040E00 s_buffer_load_dword s0, s[4:7], 0x28 ; C2000528 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v6, s0, v2 ; D2820002 04080106 s_buffer_load_dword s0, s[4:7], 0x2a ; C200052A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s0, v2 ; D2820002 04080100 s_buffer_load_dword s0, s[4:7], 0x2b ; C200052B s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s0 ; 7E060200 v_mad_f32 v2, v3, s1, v2 ; D2820002 04080303 s_buffer_load_dword s0, s[4:7], 0x2 ; C2000502 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v2, s0, -v1 ; D2820003 84040102 s_buffer_load_dword s0, s[4:7], 0x25 ; C2000525 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s0, v7 ; 10080E00 s_buffer_load_dword s0, s[4:7], 0x24 ; C2000524 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v6, s0, v4 ; D2820004 04100106 s_buffer_load_dword s0, s[4:7], 0x26 ; C2000526 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s0, v4 ; D2820004 04100100 s_buffer_load_dword s0, s[4:7], 0x27 ; C2000527 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s0 ; 7E0A0200 v_mad_f32 v4, v5, s1, v4 ; D2820004 04100305 s_movk_i32 s0, 0x464 ; B0000464 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s0, v1, -v4 ; D2820005 84120200 s_buffer_load_dword s0, s[4:7], 0x21 ; C2000521 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s0, v7 ; 100E0E00 s_buffer_load_dword s0, s[4:7], 0x20 ; C2000520 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v6, s0, v7 ; D2820006 041C0106 s_buffer_load_dword s0, s[4:7], 0x22 ; C2000522 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s0, v6 ; D2820000 04180100 s_buffer_load_dword s0, s[4:7], 0x23 ; C2000523 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s0 ; 7E0C0200 v_mad_f32 v0, v6, s1, v0 ; D2820000 04000306 s_movk_i32 s0, 0x460 ; B0000460 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s0, v1, v0 ; D2820006 04020200 exp 15, 12, 0, 0, 0, v6, v5, v3, v1 ; F80000CF 01030506 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v4 ; 10060804 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v0, v3 ; D2820003 040E0004 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v2, v3 ; D2820003 040E0404 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v1, v3 ; D2820003 040E0204 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v4 ; 100A0804 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v0, v5 ; D2820005 04160004 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v2, v5 ; D2820005 04160404 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v1, v5 ; D2820005 04160204 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v4 ; 100C0804 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v0, v6 ; D2820006 041A0004 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v2, v6 ; D2820006 041A0404 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v1, v6 ; D2820006 041A0204 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v4 ; 100E0804 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v0, v7 ; D2820007 041E0004 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v2, v7 ; D2820007 041E0404 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v1, v7 ; D2820007 041E0204 exp 15, 13, 0, 0, 0, v7, v6, v5, v3 ; F80000DF 03050607 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v3, s4, v4 ; 10060804 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v0, v3 ; D2820003 040E0004 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v2, v3 ; D2820003 040E0404 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v1, v3 ; D2820003 040E0204 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v4 ; 100A0804 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v0, v5 ; D2820005 04160004 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v2, v5 ; D2820005 04160404 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v1, v5 ; D2820005 04160204 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v4 ; 100C0804 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v0, v6 ; D2820006 041A0004 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v2, v6 ; D2820006 041A0404 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v1, v6 ; D2820006 041A0204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v4 ; 10080804 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s4, v0, v4 ; D2820000 04120004 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s4, v2, v0 ; D2820000 04020404 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s0, v1, v0 ; D2820000 04020200 exp 15, 14, 0, 1, 0, v0, v6, v5, v3 ; F80008EF 03050600 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..30] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { -1.0000, 1.0000, 0.0000, 0.0000} 0: ABS TEMP[0].x, CONST[12].yyyy 1: MUL TEMP[1].x, CONST[29].wwww, IN[2].wwww 2: MOV TEMP[2].xy, IN[0].xyyy 3: TEX TEMP[2], TEMP[2], SAMP[0], 2D 4: ADD TEMP[3].x, TEMP[2].wwww, IMM[0].xxxx 5: MAD TEMP[3].x, CONST[20].wwww, TEMP[3].xxxx, IMM[0].yyyy 6: MUL TEMP[3].x, TEMP[3].xxxx, CONST[1].wwww 7: MAD TEMP[4].x, TEMP[3].xxxx, IN[1].wwww, -TEMP[3].xxxx 8: MAD TEMP[3].x, CONST[12].wwww, TEMP[4].xxxx, TEMP[3].xxxx 9: FSGE TEMP[4].x, -TEMP[0].xxxx, IMM[0].zzzz 10: UIF TEMP[4].xxxx :0 11: MOV TEMP[4].x, TEMP[3].xxxx 12: ELSE :0 13: MOV TEMP[4].x, TEMP[1].xxxx 14: ENDIF 15: MOV TEMP[3].w, TEMP[4].xxxx 16: ADD TEMP[0].xyz, -TEMP[2].xyzz, IMM[0].yyyy 17: MAD TEMP[0].xyz, CONST[12].zzzz, TEMP[0].xyzz, TEMP[2].xyzz 18: ADD TEMP[4].x, TEMP[2].wwww, CONST[12].xxxx 19: MOV_SAT TEMP[4].x, TEMP[4].xxxx 20: ADD TEMP[1].xyz, IMM[0].xxxx, CONST[1].xyzz 21: MAD TEMP[1].xyz, TEMP[4].xxxx, TEMP[1].xyzz, IMM[0].yyyy 22: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xyzz 23: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz 24: MUL TEMP[1].xyz, TEMP[0].xyzz, CONST[30].xxxx 25: MAD TEMP[0].xyz, TEMP[0].xyzz, -CONST[30].xxxx, CONST[29].xyzz 26: ADD TEMP[2].xyz, CONST[20].xyzz, -IN[2].xyzz 27: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz 28: RSQ TEMP[4].x, TEMP[2].xxxx 29: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[2].xxxx 30: CMP TEMP[4].x, -TEMP[2].xxxx, TEMP[4].xxxx, IMM[0].zzzz 31: MAD TEMP[2].x, TEMP[4].xxxx, CONST[21].wwww, CONST[21].xxxx 32: MOV_SAT TEMP[2].x, TEMP[2].xxxx 33: MIN TEMP[2].x, TEMP[2].xxxx, CONST[21].zzzz 34: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 35: MAD TEMP[3].xyz, TEMP[2].xxxx, TEMP[0].xyzz, TEMP[1].xyzz 36: MOV OUT[0], TEMP[3] 37: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 328) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 332) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 344) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 348) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 464) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 468) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 472) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 476) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 480) %44 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %45 = load <8 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %47 = load <4 x i32> addrspace(2)* %46, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %58 = call float @fabs(float %29) %59 = fmul float %42, %57 %60 = bitcast float %48 to i32 %61 = bitcast float %49 to i32 %62 = insertelement <2 x i32> undef, i32 %60, i32 0 %63 = insertelement <2 x i32> %62, i32 %61, i32 1 %64 = bitcast <8 x i32> %45 to <32 x i8> %65 = bitcast <4 x i32> %47 to <16 x i8> %66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %63, <32 x i8> %64, <16 x i8> %65, i32 2) %67 = extractelement <4 x float> %66, i32 0 %68 = extractelement <4 x float> %66, i32 1 %69 = extractelement <4 x float> %66, i32 2 %70 = extractelement <4 x float> %66, i32 3 %71 = fadd float %70, -1.000000e+00 %72 = fmul float %35, %71 %73 = fadd float %72, 1.000000e+00 %74 = fmul float %73, %27 %75 = fsub float -0.000000e+00, %74 %76 = fmul float %74, %53 %77 = fadd float %76, %75 %78 = fmul float %31, %77 %79 = fadd float %78, %74 %80 = fsub float -0.000000e+00, %58 %81 = fcmp oge float %80, 0.000000e+00 %82 = sext i1 %81 to i32 %83 = bitcast i32 %82 to float %84 = bitcast float %83 to i32 %85 = icmp ne i32 %84, 0 %. = select i1 %85, float %79, float %59 %86 = fsub float -0.000000e+00, %67 %87 = fadd float %86, 1.000000e+00 %88 = fsub float -0.000000e+00, %68 %89 = fadd float %88, 1.000000e+00 %90 = fsub float -0.000000e+00, %69 %91 = fadd float %90, 1.000000e+00 %92 = fmul float %30, %87 %93 = fadd float %92, %67 %94 = fmul float %30, %89 %95 = fadd float %94, %68 %96 = fmul float %30, %91 %97 = fadd float %96, %69 %98 = fadd float %70, %28 %99 = call float @llvm.AMDIL.clamp.(float %98, float 0.000000e+00, float 1.000000e+00) %100 = fadd float -1.000000e+00, %24 %101 = fadd float -1.000000e+00, %25 %102 = fadd float -1.000000e+00, %26 %103 = fmul float %99, %100 %104 = fadd float %103, 1.000000e+00 %105 = fmul float %99, %101 %106 = fadd float %105, 1.000000e+00 %107 = fmul float %99, %102 %108 = fadd float %107, 1.000000e+00 %109 = fmul float %104, %50 %110 = fmul float %106, %51 %111 = fmul float %108, %52 %112 = fmul float %93, %109 %113 = fmul float %95, %110 %114 = fmul float %97, %111 %115 = fmul float %112, %43 %116 = fmul float %113, %43 %117 = fmul float %114, %43 %118 = fsub float -0.000000e+00, %43 %119 = fmul float %112, %118 %120 = fadd float %119, %39 %121 = fsub float -0.000000e+00, %43 %122 = fmul float %113, %121 %123 = fadd float %122, %40 %124 = fsub float -0.000000e+00, %43 %125 = fmul float %114, %124 %126 = fadd float %125, %41 %127 = fsub float -0.000000e+00, %54 %128 = fadd float %32, %127 %129 = fsub float -0.000000e+00, %55 %130 = fadd float %33, %129 %131 = fsub float -0.000000e+00, %56 %132 = fadd float %34, %131 %133 = fmul float %128, %128 %134 = fmul float %130, %130 %135 = fadd float %134, %133 %136 = fmul float %132, %132 %137 = fadd float %135, %136 %138 = call float @llvm.AMDGPU.rsq.clamped.f32(float %137) %139 = fmul float %138, %137 %140 = fsub float -0.000000e+00, %137 %141 = call float @llvm.AMDGPU.cndlt(float %140, float %139, float 0.000000e+00) %142 = fmul float %141, %38 %143 = fadd float %142, %36 %144 = call float @llvm.AMDIL.clamp.(float %143, float 0.000000e+00, float 1.000000e+00) %145 = call float @llvm.minnum.f32(float %144, float %37) %146 = fmul float %145, %145 %147 = fmul float %146, %120 %148 = fadd float %147, %115 %149 = fmul float %146, %123 %150 = fadd float %149, %116 %151 = fmul float %146, %126 %152 = fadd float %151, %117 %153 = call i32 @llvm.SI.packf16(float %148, float %150) %154 = bitcast i32 %153 to float %155 = call i32 @llvm.SI.packf16(float %152, float %.) %156 = bitcast i32 %155 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %154, float %156, float %154, float %156) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #3 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #3 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800F00 00430202 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v6, 1.0, v3 ; 080C06F2 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x32 ; C2020132 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v6, v3 ; D2820006 040E0C04 s_buffer_load_dword s5, s[0:3], 0x30 ; C2028130 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v7, s5, v5 ; 060E0A05 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e64 v8, -1.0, s5 ; D2060008 00000AF3 v_mad_f32 v8, v7, v8, 1.0 ; D2820008 03CA1107 v_interp_p1_f32 v9, v0, 1, 1, [m0] ; C8240500 v_interp_p2_f32 v9, [v9], v1, 1, 1, [m0] ; C8250501 v_mul_f32_e32 v8, v9, v8 ; 10101109 v_mul_f32_e32 v6, v8, v6 ; 100C0D08 s_buffer_load_dword s5, s[0:3], 0x75 ; C2028175 s_buffer_load_dword s6, s[0:3], 0x78 ; C2030178 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s5 ; 7E100205 v_mad_f32 v8, -v6, s6, v8 ; D2820008 24200D06 v_mul_f32_e32 v6, s6, v6 ; 100C0C06 v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900 v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901 s_buffer_load_dword s5, s[0:3], 0x51 ; C2028151 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v9, s5, v9 ; 08121205 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 s_buffer_load_dword s5, s[0:3], 0x50 ; C2028150 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v10, s5, v10 ; 08141405 v_mul_f32_e32 v10, v10, v10 ; 1014150A v_mad_f32 v9, v9, v9, v10 ; D2820009 042A1309 v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00 v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01 s_buffer_load_dword s5, s[0:3], 0x52 ; C2028152 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v10, s5, v10 ; 08141405 v_mad_f32 v9, v10, v10, v9 ; D2820009 0426150A v_rsq_clamp_f32_e32 v10, v9 ; 7E145909 v_mul_f32_e32 v10, v9, v10 ; 10141509 v_xor_b32_e32 v9, 0x80000000, v9 ; 3A1212FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v9 ; 7C081280 v_cndmask_b32_e64 v9, 0, v10, vcc ; D2000809 01AA1480 s_buffer_load_dword s5, s[0:3], 0x54 ; C2028154 s_buffer_load_dword s7, s[0:3], 0x57 ; C2038157 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v10, s5 ; 7E140205 v_mad_f32 v9, s7, v9, v10 ; D2820009 042A1207 v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 s_buffer_load_dword s5, s[0:3], 0x56 ; C2028156 s_waitcnt lgkmcnt(0) ; BF8C007F v_min_f32_e32 v9, s5, v9 ; 1E121205 v_mul_f32_e32 v9, v9, v9 ; 10121309 v_mad_f32 v6, v9, v8, v6 ; D2820006 041A1109 v_sub_f32_e32 v8, 1.0, v2 ; 081004F2 v_mad_f32 v8, s4, v8, v2 ; D2820008 040A1004 s_buffer_load_dword s5, s[0:3], 0x4 ; C2028104 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e64 v10, -1.0, s5 ; D206000A 00000AF3 v_mad_f32 v10, v7, v10, 1.0 ; D282000A 03CA1507 v_interp_p1_f32 v11, v0, 0, 1, [m0] ; C82C0400 v_interp_p2_f32 v11, [v11], v1, 0, 1, [m0] ; C82D0401 v_mul_f32_e32 v10, v11, v10 ; 1014150B v_mul_f32_e32 v8, v10, v8 ; 1010110A s_buffer_load_dword s5, s[0:3], 0x74 ; C2028174 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v10, s5 ; 7E140205 v_mad_f32 v10, -v8, s6, v10 ; D282000A 24280D08 v_mul_f32_e32 v8, s6, v8 ; 10101006 v_mad_f32 v8, v9, v10, v8 ; D2820008 04221509 v_cvt_pkrtz_f16_f32_e32 v6, v8, v6 ; 5E0C0D08 v_sub_f32_e32 v8, 1.0, v4 ; 081008F2 v_mad_f32 v8, s4, v8, v4 ; D2820008 04121004 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e64 v10, -1.0, s4 ; D206000A 000008F3 v_mad_f32 v7, v7, v10, 1.0 ; D2820007 03CA1507 v_interp_p1_f32 v10, v0, 2, 1, [m0] ; C8280600 v_interp_p2_f32 v10, [v10], v1, 2, 1, [m0] ; C8290601 v_mul_f32_e32 v7, v10, v7 ; 100E0F0A v_mul_f32_e32 v7, v7, v8 ; 100E1107 s_buffer_load_dword s4, s[0:3], 0x76 ; C2020176 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s4 ; 7E100204 v_mad_f32 v8, -v7, s6, v8 ; D2820008 24200D07 v_mul_f32_e32 v7, s6, v7 ; 100E0E06 v_mad_f32 v7, v9, v8, v7 ; D2820007 041E1109 v_add_f32_e32 v2, -1.0, v5 ; 06040AF3 s_buffer_load_dword s4, s[0:3], 0x53 ; C2020153 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s4, v2, 1.0 ; D2820002 03CA0404 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v2 ; 10040404 v_interp_p1_f32 v3, v0, 3, 1, [m0] ; C80C0700 v_interp_p2_f32 v3, [v3], v1, 3, 1, [m0] ; C80D0701 v_mad_f32 v3, v2, v3, -v2 ; D2820003 840A0702 s_buffer_load_dword s4, s[0:3], 0x33 ; C2020133 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s4, v3, v2 ; D2820002 040A0604 v_interp_p1_f32 v3, v0, 3, 2, [m0] ; C80C0B00 v_interp_p2_f32 v3, [v3], v1, 3, 2, [m0] ; C80D0B01 s_buffer_load_dword s4, s[0:3], 0x77 ; C2020177 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v3 ; 10000604 s_buffer_load_dword s0, s[0:3], 0x31 ; C2000131 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ge_f32_e64 s[0:1], -|s0|, 0 ; D00C0100 20010000 v_cndmask_b32_e64 v1, 0, -1, s[0:1] ; D2000801 00018280 v_cmp_ne_i32_e64 s[0:1], v1, 0 ; D10A0000 00010101 v_cndmask_b32_e64 v0, v0, v2, s[0:1] ; D2000000 00020500 v_cvt_pkrtz_f16_f32_e32 v0, v7, v0 ; 5E000107 exp 15, 0, 1, 1, 1, v6, v0, v6, v0 ; F8001C0F 00060006 s_endpgm ; BF810000 [0428/202613:ERROR:renderer_main.cc(212)] Running without renderer sandbox VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..3] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[0] 2: DP4 TEMP[1].x, IN[1], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[1], CONST[2] 5: DP4 TEMP[2].x, IN[1], CONST[3] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], IN[0] 9: MOV OUT[0], TEMP[0] 10: MOV OUT[2], TEMP[1] 11: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %13 %46 = fmul float %42, %14 %47 = fadd float %45, %46 %48 = fmul float %43, %15 %49 = fadd float %47, %48 %50 = fmul float %44, %16 %51 = fadd float %49, %50 %52 = fmul float %41, %17 %53 = fmul float %42, %18 %54 = fadd float %52, %53 %55 = fmul float %43, %19 %56 = fadd float %54, %55 %57 = fmul float %44, %20 %58 = fadd float %56, %57 %59 = fmul float %41, %21 %60 = fmul float %42, %22 %61 = fadd float %59, %60 %62 = fmul float %43, %23 %63 = fadd float %61, %62 %64 = fmul float %44, %24 %65 = fadd float %63, %64 %66 = fmul float %41, %25 %67 = fmul float %42, %26 %68 = fadd float %66, %67 %69 = fmul float %43, %27 %70 = fadd float %68, %69 %71 = fmul float %44, %28 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %33, float %34, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %65, float %72, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 v_mov_b32_e32 v6, 0 ; 7E0C0280 exp 15, 33, 0, 0, 0, v5, v4, v6, v6 ; F800021F 06060405 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v5 ; D2820000 04140103 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v0, v4, v6, v1 ; F80008CF 01060400 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xyz, TEMP[0].xyzx 3: MUL TEMP[0].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[1].w, TEMP[0].xxxx 5: MOV OUT[0], TEMP[1] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = bitcast <8 x i32> %23 to <32 x i8> %34 = bitcast <4 x i32> %25 to <16 x i8> %35 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %32, <32 x i8> %33, <16 x i8> %34, i32 2) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = extractelement <4 x float> %35, i32 2 %39 = extractelement <4 x float> %35, i32 3 %40 = fmul float %39, %26 %41 = call i32 @llvm.SI.packf16(float %36, float %37) %42 = bitcast i32 %41 to float %43 = call i32 @llvm.SI.packf16(float %38, float %40) %44 = bitcast i32 %43 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %42, float %44, float %42, float %44) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010202 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v6, v2, v3 ; 5E0C0702 v_interp_p1_f32 v7, v0, 3, 0, [m0] ; C81C0300 v_interp_p2_f32 v7, [v7], v1, 3, 0, [m0] ; C81D0301 v_mul_f32_e32 v0, v7, v5 ; 10000B07 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v6, v0, v6, v0 ; F8001C0F 00060006 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..3] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[0] 2: DP4 TEMP[1].x, IN[1], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[1], CONST[2] 5: DP4 TEMP[2].x, IN[1], CONST[3] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], IN[0] 9: MOV OUT[0], TEMP[0] 10: MOV OUT[2], TEMP[1] 11: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %13 %46 = fmul float %42, %14 %47 = fadd float %45, %46 %48 = fmul float %43, %15 %49 = fadd float %47, %48 %50 = fmul float %44, %16 %51 = fadd float %49, %50 %52 = fmul float %41, %17 %53 = fmul float %42, %18 %54 = fadd float %52, %53 %55 = fmul float %43, %19 %56 = fadd float %54, %55 %57 = fmul float %44, %20 %58 = fadd float %56, %57 %59 = fmul float %41, %21 %60 = fmul float %42, %22 %61 = fadd float %59, %60 %62 = fmul float %43, %23 %63 = fadd float %61, %62 %64 = fmul float %44, %24 %65 = fadd float %63, %64 %66 = fmul float %41, %25 %67 = fmul float %42, %26 %68 = fadd float %66, %67 %69 = fmul float %43, %27 %70 = fadd float %68, %69 %71 = fmul float %44, %28 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %33, float %34, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %65, float %72, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 v_mov_b32_e32 v6, 0 ; 7E0C0280 exp 15, 33, 0, 0, 0, v5, v4, v6, v6 ; F800021F 06060405 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v5 ; D2820000 04140103 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v0, v4, v6, v1 ; F80008CF 01060400 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { -0.5020, 1.5960, -0.8130, 0.0000} IMM[1] FLT32 { -0.0627, 1.1640, 0.0000, -0.3920} IMM[2] FLT32 { 0.0000, -0.3920, 2.0170, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0].w, TEMP[0], SAMP[2], 2D 2: ADD TEMP[0].x, TEMP[0].wwww, IMM[0].xxxx 3: MOV TEMP[1].xy, IN[1].xyyy 4: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D 5: ADD TEMP[1].x, TEMP[1].wwww, IMM[1].xxxx 6: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 7: MAD TEMP[0], TEMP[0].xxxx, IMM[0].yzww, TEMP[1].xxxx 8: MOV TEMP[1].xy, IN[1].xyyy 9: TEX TEMP[1].w, TEMP[1], SAMP[1], 2D 10: ADD TEMP[1].x, TEMP[1].wwww, IMM[0].xxxx 11: MAD TEMP[0].xyz, TEMP[1].xxxx, IMM[2].xyzx, TEMP[0] 12: MOV TEMP[1].xy, IN[1].xyyy 13: TEX TEMP[1].w, TEMP[1], SAMP[3], 2D 14: MOV TEMP[0].xyz, TEMP[0].xyzx 15: MUL TEMP[1].x, TEMP[1].wwww, IN[0].wwww 16: MOV TEMP[0].w, TEMP[1].xxxx 17: MOV OUT[0], TEMP[0] 18: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %41 = bitcast float %39 to i32 %42 = bitcast float %40 to i32 %43 = insertelement <2 x i32> undef, i32 %41, i32 0 %44 = insertelement <2 x i32> %43, i32 %42, i32 1 %45 = bitcast <8 x i32> %31 to <32 x i8> %46 = bitcast <4 x i32> %33 to <16 x i8> %47 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %44, <32 x i8> %45, <16 x i8> %46, i32 2) %48 = extractelement <4 x float> %47, i32 3 %49 = fadd float %48, 0xBFE0101020000000 %50 = bitcast float %39 to i32 %51 = bitcast float %40 to i32 %52 = insertelement <2 x i32> undef, i32 %50, i32 0 %53 = insertelement <2 x i32> %52, i32 %51, i32 1 %54 = bitcast <8 x i32> %23 to <32 x i8> %55 = bitcast <4 x i32> %25 to <16 x i8> %56 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %53, <32 x i8> %54, <16 x i8> %55, i32 2) %57 = extractelement <4 x float> %56, i32 3 %58 = fadd float %57, 0xBFB0101020000000 %59 = fmul float %58, 0x3FF29FBE80000000 %60 = fmul float %49, 0x3FF9893740000000 %61 = fadd float %60, %59 %62 = fmul float %49, 0xBFEA0418A0000000 %63 = fadd float %62, %59 %64 = fmul float %49, 0.000000e+00 %65 = fadd float %64, %59 %66 = bitcast float %39 to i32 %67 = bitcast float %40 to i32 %68 = insertelement <2 x i32> undef, i32 %66, i32 0 %69 = insertelement <2 x i32> %68, i32 %67, i32 1 %70 = bitcast <8 x i32> %27 to <32 x i8> %71 = bitcast <4 x i32> %29 to <16 x i8> %72 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %70, <16 x i8> %71, i32 2) %73 = extractelement <4 x float> %72, i32 3 %74 = fadd float %73, 0xBFE0101020000000 %75 = fmul float %74, 0.000000e+00 %76 = fadd float %75, %61 %77 = fmul float %74, 0xBFD9168720000000 %78 = fadd float %77, %63 %79 = fmul float %74, 0x400022D0E0000000 %80 = fadd float %79, %65 %81 = bitcast float %39 to i32 %82 = bitcast float %40 to i32 %83 = insertelement <2 x i32> undef, i32 %81, i32 0 %84 = insertelement <2 x i32> %83, i32 %82, i32 1 %85 = bitcast <8 x i32> %35 to <32 x i8> %86 = bitcast <4 x i32> %37 to <16 x i8> %87 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %84, <32 x i8> %85, <16 x i8> %86, i32 2) %88 = extractelement <4 x float> %87, i32 3 %89 = fmul float %88, %38 %90 = call i32 @llvm.SI.packf16(float %76, float %78) %91 = bitcast i32 %90 to float %92 = call i32 @llvm.SI.packf16(float %80, float %89) %93 = bitcast i32 %92 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %91, float %93, float %91, float %93) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx4 s[16:19], s[4:5], 0xc ; C088050C s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 s_load_dwordx8 s[44:51], s[6:7], 0x18 ; C0D60718 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v4, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[12:15] ; F0800800 00690402 v_mov_b32_e32 v5, 0xbf008081 ; 7E0A02FF BF008081 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v5, v4 ; 06080905 image_sample v6, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[0:3] ; F0800800 00050602 v_mov_b32_e32 v7, 0xbd808081 ; 7E0E02FF BD808081 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v6, v6, v7 ; 060C0F06 v_mul_f32_e32 v6, 0x3f94fdf4, v6 ; 100C0CFF 3F94FDF4 v_mov_b32_e32 v7, 0xbf5020c5 ; 7E0E02FF BF5020C5 v_mad_f32 v7, v7, v4, v6 ; D2820007 041A0907 image_sample v8, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[8:11] ; F0800800 00470802 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v5, v5, v8 ; 060A1105 v_mov_b32_e32 v8, 0xbec8b439 ; 7E1002FF BEC8B439 v_mad_f32 v7, v8, v5, v7 ; D2820007 041E0B08 v_mov_b32_e32 v8, 0x3fcc49ba ; 7E1002FF 3FCC49BA v_mad_f32 v8, v8, v4, v6 ; D2820008 041A0908 v_mad_f32 v8, 0, v5, v8 ; D2820008 04220A80 v_cvt_pkrtz_f16_f32_e32 v7, v8, v7 ; 5E0E0F08 v_mad_f32 v4, 0, v4, v6 ; D2820004 041A0880 v_mov_b32_e32 v6, 0x40011687 ; 7E0C02FF 40011687 v_mad_f32 v4, v6, v5, v4 ; D2820004 04120B06 image_sample v2, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[44:51], s[16:19] ; F0800800 008B0202 v_interp_p1_f32 v3, v0, 3, 0, [m0] ; C80C0300 v_interp_p2_f32 v3, [v3], v1, 3, 0, [m0] ; C80D0301 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v3, v2 ; 10000503 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v7, v0, v7, v0 ; F8001C0F 00070007 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL CONST[0..3] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[2] 2: DP4 TEMP[1].x, IN[1], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV TEMP[1].xy, IN[0].xyxx 5: MOV OUT[1], CONST[0] 6: MOV OUT[2], CONST[1] 7: MOV OUT[0], TEMP[0] 8: MOV OUT[3], TEMP[1] 9: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %36 = load <16 x i8> addrspace(2)* %35, !tbaa !0 %37 = add i32 %5, %7 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = fmul float %39, %21 %44 = fmul float %40, %22 %45 = fadd float %43, %44 %46 = fmul float %41, %23 %47 = fadd float %45, %46 %48 = fmul float %42, %24 %49 = fadd float %47, %48 %50 = fmul float %39, %25 %51 = fmul float %40, %26 %52 = fadd float %50, %51 %53 = fmul float %41, %27 %54 = fadd float %52, %53 %55 = fmul float %42, %28 %56 = fadd float %54, %55 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %33, float %34, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %49, float %56, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 34, 0, 0, 0, v1, v2, v5, v5 ; F800022F 05050201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v1 ; 100C0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v2, s4, v6 ; D2820006 04180902 s_buffer_load_dword s0, s[0:3], 0xb ; C200010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v6 ; D2820000 04180103 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v0, v4, v5, v1 ; F80008CF 01050400 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[2].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[1], IN[0] 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %36 = bitcast float %34 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %23 to <32 x i8> %41 = bitcast <4 x i32> %25 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = fmul float %43, %30 %48 = fadd float %47, %26 %49 = fmul float %44, %31 %50 = fadd float %49, %27 %51 = fmul float %45, %32 %52 = fadd float %51, %28 %53 = fmul float %46, %33 %54 = fadd float %53, %29 %55 = call i32 @llvm.SI.packf16(float %48, float %50) %56 = bitcast i32 %55 to float %57 = call i32 @llvm.SI.packf16(float %52, float %54) %58 = bitcast i32 %57 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %56, float %58, float %56, float %58) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 2, [m0] ; C80C0900 v_interp_p2_f32 v3, [v3], v1, 1, 2, [m0] ; C80D0901 v_interp_p1_f32 v2, v0, 0, 2, [m0] ; C8080800 v_interp_p2_f32 v2, [v2], v1, 0, 2, [m0] ; C8090801 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010202 v_interp_p1_f32 v6, v0, 3, 0, [m0] ; C8180300 v_interp_p2_f32 v6, [v6], v1, 3, 0, [m0] ; C8190301 v_interp_p1_f32 v7, v0, 3, 1, [m0] ; C81C0700 v_interp_p2_f32 v7, [v7], v1, 3, 1, [m0] ; C81D0701 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, v5, v7, v6 ; D2820006 041A0F05 v_interp_p1_f32 v7, v0, 2, 0, [m0] ; C81C0200 v_interp_p2_f32 v7, [v7], v1, 2, 0, [m0] ; C81D0201 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_mad_f32 v7, v4, v8, v7 ; D2820007 041E1104 v_cvt_pkrtz_f16_f32_e32 v6, v7, v6 ; 5E0C0D07 v_interp_p1_f32 v7, v0, 1, 0, [m0] ; C81C0100 v_interp_p2_f32 v7, [v7], v1, 1, 0, [m0] ; C81D0101 v_interp_p1_f32 v8, v0, 1, 1, [m0] ; C8200500 v_interp_p2_f32 v8, [v8], v1, 1, 1, [m0] ; C8210501 v_mad_f32 v7, v3, v8, v7 ; D2820007 041E1103 v_interp_p1_f32 v8, v0, 0, 0, [m0] ; C8200000 v_interp_p2_f32 v8, [v8], v1, 0, 0, [m0] ; C8210001 v_interp_p1_f32 v9, v0, 0, 1, [m0] ; C8240400 v_interp_p2_f32 v9, [v9], v1, 0, 1, [m0] ; C8250401 v_mad_f32 v0, v2, v9, v8 ; D2820000 04221302 v_cvt_pkrtz_f16_f32_e32 v0, v0, v7 ; 5E000F00 exp 15, 0, 1, 1, 1, v0, v6, v0, v6 ; F8001C0F 06000600 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL CONST[0..95] DCL TEMP[0..3], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 4.0000, 2.1000} IMM[1] FLT32 { 3.1000, 0.1000, 1.1000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IN[2].xxxx, IMM[0].zzzz 2: ADD TEMP[2].x, IMM[0].wwww, TEMP[1].xxxx 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[1], CONST[ADDR[0].x] 7: ADD TEMP[2].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[2].x, TEMP[2].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ADD TEMP[2].x, TEMP[1].xxxx, IMM[1].yyyy 13: F2I TEMP[2].x, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: MOV TEMP[2], CONST[ADDR[0].x] 16: ADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 17: F2I TEMP[1].x, TEMP[1].xxxx 18: UARL ADDR[0].x, TEMP[1].xxxx 19: MOV TEMP[1], CONST[ADDR[0].x] 20: MOV TEMP[3].xy, IN[0].xyxx 21: MOV OUT[1], TEMP[2] 22: MOV OUT[2], TEMP[1] 23: MOV OUT[0], TEMP[0] 24: MOV OUT[3], TEMP[3] 25: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %20 = load <16 x i8> addrspace(2)* %19, !tbaa !0 %21 = add i32 %5, %7 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = extractelement <4 x float> %22, i32 2 %26 = extractelement <4 x float> %22, i32 3 %27 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %28 = load <16 x i8> addrspace(2)* %27, !tbaa !0 %29 = add i32 %5, %7 %30 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %29) %31 = extractelement <4 x float> %30, i32 0 %32 = fmul float %31, 4.000000e+00 %33 = fadd float 0x4000CCCCC0000000, %32 %34 = fptosi float %33 to i32 %35 = bitcast i32 %34 to float %36 = bitcast float %35 to i32 %37 = shl i32 %36, 4 %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %37) %39 = shl i32 %36, 4 %40 = add i32 %39, 4 %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %40) %42 = shl i32 %36, 4 %43 = add i32 %42, 8 %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %43) %45 = shl i32 %36, 4 %46 = add i32 %45, 12 %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %46) %48 = fmul float %23, %38 %49 = fmul float %24, %41 %50 = fadd float %48, %49 %51 = fmul float %25, %44 %52 = fadd float %50, %51 %53 = fmul float %26, %47 %54 = fadd float %52, %53 %55 = fadd float 0x4008CCCCC0000000, %32 %56 = fptosi float %55 to i32 %57 = bitcast i32 %56 to float %58 = bitcast float %57 to i32 %59 = shl i32 %58, 4 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = shl i32 %58, 4 %62 = add i32 %61, 4 %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %62) %64 = shl i32 %58, 4 %65 = add i32 %64, 8 %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %65) %67 = shl i32 %58, 4 %68 = add i32 %67, 12 %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %68) %70 = fmul float %23, %60 %71 = fmul float %24, %63 %72 = fadd float %70, %71 %73 = fmul float %25, %66 %74 = fadd float %72, %73 %75 = fmul float %26, %69 %76 = fadd float %74, %75 %77 = fadd float %32, 0x3FB99999A0000000 %78 = fptosi float %77 to i32 %79 = bitcast i32 %78 to float %80 = bitcast float %79 to i32 %81 = shl i32 %80, 4 %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %81) %83 = shl i32 %80, 4 %84 = add i32 %83, 4 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = shl i32 %80, 4 %87 = add i32 %86, 8 %88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %87) %89 = shl i32 %80, 4 %90 = add i32 %89, 12 %91 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %90) %92 = fadd float 0x3FF19999A0000000, %32 %93 = fptosi float %92 to i32 %94 = bitcast i32 %93 to float %95 = bitcast float %94 to i32 %96 = shl i32 %95, 4 %97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %96) %98 = shl i32 %95, 4 %99 = add i32 %98, 4 %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %99) %101 = shl i32 %95, 4 %102 = add i32 %101, 8 %103 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %102) %104 = shl i32 %95, 4 %105 = add i32 %104, 12 %106 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %105) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %82, float %85, float %88, float %91) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %97, float %100, float %103, float %106) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %17, float %18, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %54, float %76, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 v_mov_b32_e32 v5, 0x3dcccccd ; 7E0A02FF 3DCCCCCD s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, 4.0, v1, v5 ; D2820005 041602F6 v_cvt_i32_f32_e32 v5, v5 ; 7E0A1105 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v6, v5, s[0:3], 0 offen ; E0301000 80000605 v_or_b32_e32 v7, 12, v5 ; 380E0A8C buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 v_or_b32_e32 v8, 8, v5 ; 38100A88 buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 v_or_b32_e32 v5, 4, v5 ; 380A0A84 buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v6, v5, v8, v7 ; F800020F 07080506 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v5, 0x3f8ccccd ; 7E0A02FF 3F8CCCCD v_mad_f32 v5, 4.0, v1, v5 ; D2820005 041602F6 v_cvt_i32_f32_e32 v5, v5 ; 7E0A1105 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 buffer_load_dword v6, v5, s[0:3], 0 offen ; E0301000 80000605 v_or_b32_e32 v7, 12, v5 ; 380E0A8C buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 v_or_b32_e32 v8, 8, v5 ; 38100A88 buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 v_or_b32_e32 v5, 4, v5 ; 380A0A84 buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v6, v5, v8, v7 ; F800021F 07080506 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 v_mov_b32_e32 v9, 0 ; 7E120280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 34, 0, 0, 0, v5, v6, v9, v9 ; F800022F 09090605 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v5, 0x40466666 ; 7E0A02FF 40466666 v_mad_f32 v5, 4.0, v1, v5 ; D2820005 041602F6 v_cvt_i32_f32_e32 v5, v5 ; 7E0A1105 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 v_or_b32_e32 v6, 4, v5 ; 380C0A84 buffer_load_dword v6, v6, s[0:3], 0 offen ; E0301000 80000606 buffer_load_format_xyzw v[10:13], v0, s[12:15], 0 idxen ; E00C2000 80030A00 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v6, v11 ; 10001706 buffer_load_dword v6, v5, s[0:3], 0 offen ; E0301000 80000605 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v10, v6, v0 ; D2820000 04020D0A v_or_b32_e32 v6, 8, v5 ; 380C0A88 buffer_load_dword v6, v6, s[0:3], 0 offen ; E0301000 80000606 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v12, v6, v0 ; D2820000 04020D0C v_or_b32_e32 v5, 12, v5 ; 380A0A8C buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v13, v5, v0 ; D2820000 04020B0D v_mov_b32_e32 v5, 0x40066666 ; 7E0A02FF 40066666 v_mad_f32 v1, 4.0, v1, v5 ; D2820001 041602F6 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_or_b32_e32 v2, 4, v1 ; 38040284 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v2, v11 ; 10041702 buffer_load_dword v3, v1, s[0:3], 0 offen ; E0301000 80000301 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v10, v3, v2 ; D2820002 040A070A v_or_b32_e32 v3, 8, v1 ; 38060288 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v12, v3, v2 ; D2820002 040A070C v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v13, v1, v2 ; D2820001 040A030D v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 12, 0, 1, 0, v1, v0, v9, v2 ; F80008CF 02090001 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[2].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[1], IN[0] 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %36 = bitcast float %34 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %23 to <32 x i8> %41 = bitcast <4 x i32> %25 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = fmul float %43, %30 %48 = fadd float %47, %26 %49 = fmul float %44, %31 %50 = fadd float %49, %27 %51 = fmul float %45, %32 %52 = fadd float %51, %28 %53 = fmul float %46, %33 %54 = fadd float %53, %29 %55 = call i32 @llvm.SI.packf16(float %48, float %50) %56 = bitcast i32 %55 to float %57 = call i32 @llvm.SI.packf16(float %52, float %54) %58 = bitcast i32 %57 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %56, float %58, float %56, float %58) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 2, [m0] ; C80C0900 v_interp_p2_f32 v3, [v3], v1, 1, 2, [m0] ; C80D0901 v_interp_p1_f32 v2, v0, 0, 2, [m0] ; C8080800 v_interp_p2_f32 v2, [v2], v1, 0, 2, [m0] ; C8090801 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010202 v_interp_p1_f32 v6, v0, 3, 0, [m0] ; C8180300 v_interp_p2_f32 v6, [v6], v1, 3, 0, [m0] ; C8190301 v_interp_p1_f32 v7, v0, 3, 1, [m0] ; C81C0700 v_interp_p2_f32 v7, [v7], v1, 3, 1, [m0] ; C81D0701 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, v5, v7, v6 ; D2820006 041A0F05 v_interp_p1_f32 v7, v0, 2, 0, [m0] ; C81C0200 v_interp_p2_f32 v7, [v7], v1, 2, 0, [m0] ; C81D0201 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_mad_f32 v7, v4, v8, v7 ; D2820007 041E1104 v_cvt_pkrtz_f16_f32_e32 v6, v7, v6 ; 5E0C0D07 v_interp_p1_f32 v7, v0, 1, 0, [m0] ; C81C0100 v_interp_p2_f32 v7, [v7], v1, 1, 0, [m0] ; C81D0101 v_interp_p1_f32 v8, v0, 1, 1, [m0] ; C8200500 v_interp_p2_f32 v8, [v8], v1, 1, 1, [m0] ; C8210501 v_mad_f32 v7, v3, v8, v7 ; D2820007 041E1103 v_interp_p1_f32 v8, v0, 0, 0, [m0] ; C8200000 v_interp_p2_f32 v8, [v8], v1, 0, 0, [m0] ; C8210001 v_interp_p1_f32 v9, v0, 0, 1, [m0] ; C8240400 v_interp_p2_f32 v9, [v9], v1, 0, 1, [m0] ; C8250401 v_mad_f32 v0, v2, v9, v8 ; D2820000 04221302 v_cvt_pkrtz_f16_f32_e32 v0, v0, v7 ; 5E000F00 exp 15, 0, 1, 1, 1, v0, v6, v0, v6 ; F8001C0F 06000600 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL CONST[0..3] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[2] 2: DP4 TEMP[1].x, IN[2], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV OUT[2], IN[1] 5: MOV OUT[3], CONST[0] 6: MOV OUT[1], IN[0] 7: MOV OUT[4], CONST[1] 8: MOV OUT[0], TEMP[0] 9: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %49, %21 %54 = fmul float %50, %22 %55 = fadd float %53, %54 %56 = fmul float %51, %23 %57 = fadd float %55, %56 %58 = fmul float %52, %24 %59 = fadd float %57, %58 %60 = fmul float %49, %25 %61 = fmul float %50, %26 %62 = fadd float %60, %61 %63 = fmul float %51, %27 %64 = fadd float %62, %63 %65 = fmul float %52, %28 %66 = fadd float %64, %65 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %33, float %34, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %66, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 34, 0, 0, 0, v1, v2, v3, v4 ; F800022F 04030201 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 35, 0, 0, 0, v1, v2, v3, v4 ; F800023F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s0, s[0:3], 0xb ; C200010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v5 ; D2820000 04140103 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 12, 0, 1, 0, v0, v4, v2, v1 ; F80008CF 01020400 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MAD TEMP[0], IN[0], IN[3], IN[2] 1: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %35 = fmul float %22, %31 %36 = fadd float %35, %27 %37 = fmul float %23, %32 %38 = fadd float %37, %28 %39 = fmul float %24, %33 %40 = fadd float %39, %29 %41 = fmul float %25, %34 %42 = fadd float %41, %30 %43 = fmul float %42, %26 %44 = call i32 @llvm.SI.packf16(float %36, float %38) %45 = bitcast i32 %44 to float %46 = call i32 @llvm.SI.packf16(float %40, float %43) %47 = bitcast i32 %46 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %45, float %47, float %45, float %47) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 2, [m0] ; C8080900 v_interp_p2_f32 v2, [v2], v1, 1, 2, [m0] ; C8090901 v_interp_p1_f32 v3, v0, 1, 3, [m0] ; C80C0D00 v_interp_p2_f32 v3, [v3], v1, 1, 3, [m0] ; C80D0D01 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_mad_f32 v2, v4, v3, v2 ; D2820002 040A0704 v_interp_p1_f32 v3, v0, 0, 2, [m0] ; C80C0800 v_interp_p2_f32 v3, [v3], v1, 0, 2, [m0] ; C80D0801 v_interp_p1_f32 v4, v0, 0, 3, [m0] ; C8100C00 v_interp_p2_f32 v4, [v4], v1, 0, 3, [m0] ; C8110C01 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 v_mad_f32 v3, v5, v4, v3 ; D2820003 040E0905 v_cvt_pkrtz_f16_f32_e32 v2, v3, v2 ; 5E040503 v_interp_p1_f32 v3, v0, 2, 2, [m0] ; C80C0A00 v_interp_p2_f32 v3, [v3], v1, 2, 2, [m0] ; C80D0A01 v_interp_p1_f32 v4, v0, 2, 3, [m0] ; C8100E00 v_interp_p2_f32 v4, [v4], v1, 2, 3, [m0] ; C8110E01 v_interp_p1_f32 v5, v0, 2, 0, [m0] ; C8140200 v_interp_p2_f32 v5, [v5], v1, 2, 0, [m0] ; C8150201 v_mad_f32 v3, v5, v4, v3 ; D2820003 040E0905 v_interp_p1_f32 v4, v0, 3, 2, [m0] ; C8100B00 v_interp_p2_f32 v4, [v4], v1, 3, 2, [m0] ; C8110B01 v_interp_p1_f32 v5, v0, 3, 3, [m0] ; C8140F00 v_interp_p2_f32 v5, [v5], v1, 3, 3, [m0] ; C8150F01 v_interp_p1_f32 v6, v0, 3, 0, [m0] ; C8180300 v_interp_p2_f32 v6, [v6], v1, 3, 0, [m0] ; C8190301 v_mad_f32 v4, v6, v5, v4 ; D2820004 04120B06 v_interp_p1_f32 v5, v0, 3, 1, [m0] ; C8140700 v_interp_p2_f32 v5, [v5], v1, 3, 1, [m0] ; C8150701 v_mul_f32_e32 v0, v5, v4 ; 10000905 v_cvt_pkrtz_f16_f32_e32 v0, v3, v0 ; 5E000103 exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL CONST[0..47] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 2.0000, 0.1000} IMM[1] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IN[1].xxxx, IMM[0].zzzz 2: ADD TEMP[2].x, TEMP[1].xxxx, IMM[0].wwww 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[0], CONST[ADDR[0].x] 7: ADD TEMP[1].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[1].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[1].xxxx 10: DP4 TEMP[1].x, IN[0], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[1].xxxx 12: MOV OUT[0], TEMP[0] 13: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = fmul float %25, 2.000000e+00 %27 = fadd float %26, 0x3FB99999A0000000 %28 = fptosi float %27 to i32 %29 = bitcast i32 %28 to float %30 = bitcast float %29 to i32 %31 = shl i32 %30, 4 %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %31) %33 = shl i32 %30, 4 %34 = add i32 %33, 4 %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %34) %36 = shl i32 %30, 4 %37 = add i32 %36, 8 %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %37) %39 = shl i32 %30, 4 %40 = add i32 %39, 12 %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %40) %42 = fmul float %17, %32 %43 = fmul float %18, %35 %44 = fadd float %42, %43 %45 = fmul float %19, %38 %46 = fadd float %44, %45 %47 = fmul float %20, %41 %48 = fadd float %46, %47 %49 = fadd float 0x3FF19999A0000000, %26 %50 = fptosi float %49 to i32 %51 = bitcast i32 %50 to float %52 = bitcast float %51 to i32 %53 = shl i32 %52, 4 %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %53) %55 = shl i32 %52, 4 %56 = add i32 %55, 4 %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %56) %58 = shl i32 %52, 4 %59 = add i32 %58, 8 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = shl i32 %52, 4 %62 = add i32 %61, 12 %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %62) %64 = fmul float %17, %54 %65 = fmul float %18, %57 %66 = fadd float %64, %65 %67 = fmul float %19, %60 %68 = fadd float %66, %67 %69 = fmul float %20, %63 %70 = fadd float %68, %69 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %48, float %70, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 v_mov_b32_e32 v5, 0x3f8ccccd ; 7E0A02FF 3F8CCCCD s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, 2.0, v1, v5 ; D2820005 041602F4 v_cvt_i32_f32_e32 v5, v5 ; 7E0A1105 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 v_or_b32_e32 v6, 4, v5 ; 380C0A84 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v6, v6, s[0:3], 0 offen ; E0301000 80000606 buffer_load_format_xyzw v[7:10], v0, s[4:7], 0 idxen ; E00C2000 80010700 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v6, v8 ; 10001106 buffer_load_dword v6, v5, s[0:3], 0 offen ; E0301000 80000605 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v7, v6, v0 ; D2820000 04020D07 v_or_b32_e32 v6, 8, v5 ; 380C0A88 buffer_load_dword v6, v6, s[0:3], 0 offen ; E0301000 80000606 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v9, v6, v0 ; D2820000 04020D09 v_or_b32_e32 v5, 12, v5 ; 380A0A8C buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v10, v5, v0 ; D2820000 04020B0A v_mov_b32_e32 v5, 0x3dcccccd ; 7E0A02FF 3DCCCCCD v_mad_f32 v1, 2.0, v1, v5 ; D2820001 041602F4 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_or_b32_e32 v2, 4, v1 ; 38040284 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v2, v8 ; 10041102 buffer_load_dword v3, v1, s[0:3], 0 offen ; E0301000 80000301 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v7, v3, v2 ; D2820002 040A0707 v_or_b32_e32 v3, 8, v1 ; 38060288 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v9, v3, v2 ; D2820002 040A0709 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v10, v1, v2 ; D2820001 040A030A v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 12, 0, 1, 0, v1, v0, v3, v2 ; F80008CF 02030001 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL OUT[0], COLOR DCL CONST[0] 0: MOV OUT[0], CONST[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %26, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s4 ; 7E000204 v_cvt_pkrtz_f16_f32_e32 v0, s5, v0 ; 5E000005 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_cvt_pkrtz_f16_f32_e32 v1, s0, v1 ; 5E020200 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[0] 2: DP4 TEMP[1].x, IN[0], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = fmul float %25, %13 %30 = fmul float %26, %14 %31 = fadd float %29, %30 %32 = fmul float %27, %15 %33 = fadd float %31, %32 %34 = fmul float %28, %16 %35 = fadd float %33, %34 %36 = fmul float %25, %17 %37 = fmul float %26, %18 %38 = fadd float %36, %37 %39 = fmul float %27, %19 %40 = fadd float %38, %39 %41 = fmul float %28, %20 %42 = fadd float %40, %41 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %35, float %42, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v5 ; D2820000 04140103 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 12, 0, 1, 0, v0, v4, v2, v1 ; F80008CF 01020400 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL OUT[0], COLOR DCL CONST[0] 0: MOV OUT[0], CONST[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %26, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s4 ; 7E000204 v_cvt_pkrtz_f16_f32_e32 v0, s5, v0 ; 5E000005 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_cvt_pkrtz_f16_f32_e32 v1, s0, v1 ; 5E020200 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 1020.0400, 2.1000} IMM[1] FLT32 { 3.1000, 255.0100, 4.0000, 0.1000} IMM[2] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IMM[0].zzzz, IN[1].zzzz 2: ADD TEMP[2].x, IMM[0].wwww, TEMP[1].xxxx 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[2], CONST[ADDR[0].x] 7: ADD TEMP[1].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[1].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[1].xxxx 10: DP4 TEMP[1].x, IN[2], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[1].xxxx 12: MUL TEMP[1].x, IN[1].zzzz, IMM[1].yyyy 13: MAD TEMP[2].x, TEMP[1].xxxx, IMM[1].zzzz, IMM[1].wwww 14: F2I TEMP[2].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: MOV TEMP[2], CONST[ADDR[0].x] 17: MAD TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz, IMM[2].xxxx 18: F2I TEMP[1].x, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: MOV TEMP[1], CONST[ADDR[0].x] 21: MOV OUT[2], IN[1] 22: MOV OUT[3], TEMP[2] 23: MOV OUT[1], IN[0] 24: MOV OUT[4], TEMP[1] 25: MOV OUT[0], TEMP[0] 26: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float 0x408FE051E0000000, %27 %38 = fadd float 0x4000CCCCC0000000, %37 %39 = fptosi float %38 to i32 %40 = bitcast i32 %39 to float %41 = bitcast float %40 to i32 %42 = shl i32 %41, 4 %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %42) %44 = shl i32 %41, 4 %45 = add i32 %44, 4 %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %45) %47 = shl i32 %41, 4 %48 = add i32 %47, 8 %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %48) %50 = shl i32 %41, 4 %51 = add i32 %50, 12 %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %51) %53 = fmul float %33, %43 %54 = fmul float %34, %46 %55 = fadd float %53, %54 %56 = fmul float %35, %49 %57 = fadd float %55, %56 %58 = fmul float %36, %52 %59 = fadd float %57, %58 %60 = fadd float 0x4008CCCCC0000000, %37 %61 = fptosi float %60 to i32 %62 = bitcast i32 %61 to float %63 = bitcast float %62 to i32 %64 = shl i32 %63, 4 %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %64) %66 = shl i32 %63, 4 %67 = add i32 %66, 4 %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %67) %69 = shl i32 %63, 4 %70 = add i32 %69, 8 %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %70) %72 = shl i32 %63, 4 %73 = add i32 %72, 12 %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %73) %75 = fmul float %33, %65 %76 = fmul float %34, %68 %77 = fadd float %75, %76 %78 = fmul float %35, %71 %79 = fadd float %77, %78 %80 = fmul float %36, %74 %81 = fadd float %79, %80 %82 = fmul float %27, 0x406FE051E0000000 %83 = fmul float %82, 4.000000e+00 %84 = fadd float %83, 0x3FB99999A0000000 %85 = fptosi float %84 to i32 %86 = bitcast i32 %85 to float %87 = bitcast float %86 to i32 %88 = shl i32 %87, 4 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = shl i32 %87, 4 %91 = add i32 %90, 4 %92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %91) %93 = shl i32 %87, 4 %94 = add i32 %93, 8 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = shl i32 %87, 4 %97 = add i32 %96, 12 %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %97) %99 = fmul float %82, 4.000000e+00 %100 = fadd float %99, 0x3FF19999A0000000 %101 = fptosi float %100 to i32 %102 = bitcast i32 %101 to float %103 = bitcast float %102 to i32 %104 = shl i32 %103, 4 %105 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %104) %106 = shl i32 %103, 4 %107 = add i32 %106, 4 %108 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %107) %109 = shl i32 %103, 4 %110 = add i32 %109, 8 %111 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %110) %112 = shl i32 %103, 4 %113 = add i32 %112, 12 %114 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %113) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %25, float %26, float %27, float %28) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %89, float %92, float %95, float %98) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %105, float %108, float %111, float %114) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %81, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 v_mul_f32_e32 v5, 0x437f028f, v3 ; 100A06FF 437F028F v_mov_b32_e32 v6, 0x3dcccccd ; 7E0C02FF 3DCCCCCD v_mad_f32 v6, 4.0, v5, v6 ; D2820006 041A0AF6 v_cvt_i32_f32_e32 v6, v6 ; 7E0C1106 v_lshlrev_b32_e32 v6, 4, v6 ; 340C0C84 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F buffer_load_dword v7, v6, s[0:3], 0 offen ; E0301000 80000706 v_or_b32_e32 v8, 12, v6 ; 38100C8C buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 v_or_b32_e32 v9, 8, v6 ; 38120C88 buffer_load_dword v9, v9, s[0:3], 0 offen ; E0301000 80000909 v_or_b32_e32 v6, 4, v6 ; 380C0C84 buffer_load_dword v6, v6, s[0:3], 0 offen ; E0301000 80000606 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 34, 0, 0, 0, v7, v6, v9, v8 ; F800022F 08090607 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v6, 0x3f8ccccd ; 7E0C02FF 3F8CCCCD v_mad_f32 v5, 4.0, v5, v6 ; D2820005 041A0AF6 v_cvt_i32_f32_e32 v5, v5 ; 7E0A1105 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 buffer_load_dword v6, v5, s[0:3], 0 offen ; E0301000 80000605 v_or_b32_e32 v7, 12, v5 ; 380E0A8C buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 v_or_b32_e32 v8, 8, v5 ; 38100A88 buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 v_or_b32_e32 v5, 4, v5 ; 380A0A84 buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 35, 0, 0, 0, v6, v5, v8, v7 ; F800023F 07080506 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v5, 0x40466666 ; 7E0A02FF 40466666 v_mov_b32_e32 v6, 0x447f028f ; 7E0C02FF 447F028F v_mad_f32 v5, v6, v3, v5 ; D2820005 04160706 v_cvt_i32_f32_e32 v5, v5 ; 7E0A1105 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 v_or_b32_e32 v7, 4, v5 ; 380E0A84 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 buffer_load_format_xyzw v[8:11], v0, s[8:11], 0 idxen ; E00C2000 80020800 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v7, v9 ; 10001307 buffer_load_dword v7, v5, s[0:3], 0 offen ; E0301000 80000705 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v8, v7, v0 ; D2820000 04020F08 v_or_b32_e32 v7, 8, v5 ; 380E0A88 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v10, v7, v0 ; D2820000 04020F0A v_or_b32_e32 v5, 12, v5 ; 380A0A8C buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v11, v5, v0 ; D2820000 04020B0B v_mov_b32_e32 v5, 0x40066666 ; 7E0A02FF 40066666 v_mad_f32 v1, v6, v3, v5 ; D2820001 04160706 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_or_b32_e32 v2, 4, v1 ; 38040284 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v2, v9 ; 10041302 buffer_load_dword v3, v1, s[0:3], 0 offen ; E0301000 80000301 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v8, v3, v2 ; D2820002 040A0708 v_or_b32_e32 v3, 8, v1 ; 38060288 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v10, v3, v2 ; D2820002 040A070A v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v11, v1, v2 ; D2820001 040A030B v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 12, 0, 1, 0, v1, v0, v3, v2 ; F80008CF 02030001 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MAD TEMP[0], IN[0], IN[3], IN[2] 1: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %35 = fmul float %22, %31 %36 = fadd float %35, %27 %37 = fmul float %23, %32 %38 = fadd float %37, %28 %39 = fmul float %24, %33 %40 = fadd float %39, %29 %41 = fmul float %25, %34 %42 = fadd float %41, %30 %43 = fmul float %42, %26 %44 = call i32 @llvm.SI.packf16(float %36, float %38) %45 = bitcast i32 %44 to float %46 = call i32 @llvm.SI.packf16(float %40, float %43) %47 = bitcast i32 %46 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %45, float %47, float %45, float %47) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 2, [m0] ; C8080900 v_interp_p2_f32 v2, [v2], v1, 1, 2, [m0] ; C8090901 v_interp_p1_f32 v3, v0, 1, 3, [m0] ; C80C0D00 v_interp_p2_f32 v3, [v3], v1, 1, 3, [m0] ; C80D0D01 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_mad_f32 v2, v4, v3, v2 ; D2820002 040A0704 v_interp_p1_f32 v3, v0, 0, 2, [m0] ; C80C0800 v_interp_p2_f32 v3, [v3], v1, 0, 2, [m0] ; C80D0801 v_interp_p1_f32 v4, v0, 0, 3, [m0] ; C8100C00 v_interp_p2_f32 v4, [v4], v1, 0, 3, [m0] ; C8110C01 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 v_mad_f32 v3, v5, v4, v3 ; D2820003 040E0905 v_cvt_pkrtz_f16_f32_e32 v2, v3, v2 ; 5E040503 v_interp_p1_f32 v3, v0, 2, 2, [m0] ; C80C0A00 v_interp_p2_f32 v3, [v3], v1, 2, 2, [m0] ; C80D0A01 v_interp_p1_f32 v4, v0, 2, 3, [m0] ; C8100E00 v_interp_p2_f32 v4, [v4], v1, 2, 3, [m0] ; C8110E01 v_interp_p1_f32 v5, v0, 2, 0, [m0] ; C8140200 v_interp_p2_f32 v5, [v5], v1, 2, 0, [m0] ; C8150201 v_mad_f32 v3, v5, v4, v3 ; D2820003 040E0905 v_interp_p1_f32 v4, v0, 3, 2, [m0] ; C8100B00 v_interp_p2_f32 v4, [v4], v1, 3, 2, [m0] ; C8110B01 v_interp_p1_f32 v5, v0, 3, 3, [m0] ; C8140F00 v_interp_p2_f32 v5, [v5], v1, 3, 3, [m0] ; C8150F01 v_interp_p1_f32 v6, v0, 3, 0, [m0] ; C8180300 v_interp_p2_f32 v6, [v6], v1, 3, 0, [m0] ; C8190301 v_mad_f32 v4, v6, v5, v4 ; D2820004 04120B06 v_interp_p1_f32 v5, v0, 3, 1, [m0] ; C8140700 v_interp_p2_f32 v5, [v5], v1, 3, 1, [m0] ; C8150701 v_mul_f32_e32 v0, v5, v4 ; 10000905 v_cvt_pkrtz_f16_f32_e32 v0, v3, v0 ; 5E000103 exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 4.0000, 2.1000} IMM[1] FLT32 { 3.1000, 1.1000, 0.1000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IN[3].xxxx, IMM[0].zzzz 2: ADD TEMP[2].x, IMM[0].wwww, TEMP[1].xxxx 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[2], CONST[ADDR[0].x] 7: ADD TEMP[2].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[2].x, TEMP[2].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: DP4 TEMP[2].x, IN[2], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ADD TEMP[2].x, IMM[1].yyyy, TEMP[1].xxxx 13: F2I TEMP[2].x, TEMP[2].xxxx 14: ADD TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz 15: F2I TEMP[1].x, TEMP[1].xxxx 16: UARL ADDR[0].x, TEMP[1].xxxx 17: UARL ADDR[0].x, TEMP[1].xxxx 18: MOV TEMP[1], CONST[ADDR[0].x] 19: UARL ADDR[0].x, TEMP[2].xxxx 20: UARL ADDR[0].x, TEMP[2].xxxx 21: MAD TEMP[1], IN[0], CONST[ADDR[0].x], TEMP[1] 22: MOV TEMP[2].xy, IN[1].xyxx 23: MOV OUT[1], TEMP[1] 24: MOV OUT[0], TEMP[0] 25: MOV OUT[2], TEMP[2] 26: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %28 = load <16 x i8> addrspace(2)* %27, !tbaa !0 %29 = add i32 %5, %7 %30 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %29) %31 = extractelement <4 x float> %30, i32 0 %32 = extractelement <4 x float> %30, i32 1 %33 = extractelement <4 x float> %30, i32 2 %34 = extractelement <4 x float> %30, i32 3 %35 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %36 = load <16 x i8> addrspace(2)* %35, !tbaa !0 %37 = add i32 %5, %7 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = fmul float %39, 4.000000e+00 %41 = fadd float 0x4000CCCCC0000000, %40 %42 = fptosi float %41 to i32 %43 = bitcast i32 %42 to float %44 = bitcast float %43 to i32 %45 = shl i32 %44, 4 %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %45) %47 = shl i32 %44, 4 %48 = add i32 %47, 4 %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %48) %50 = shl i32 %44, 4 %51 = add i32 %50, 8 %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %51) %53 = shl i32 %44, 4 %54 = add i32 %53, 12 %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %54) %56 = fmul float %31, %46 %57 = fmul float %32, %49 %58 = fadd float %56, %57 %59 = fmul float %33, %52 %60 = fadd float %58, %59 %61 = fmul float %34, %55 %62 = fadd float %60, %61 %63 = fadd float 0x4008CCCCC0000000, %40 %64 = fptosi float %63 to i32 %65 = bitcast i32 %64 to float %66 = bitcast float %65 to i32 %67 = shl i32 %66, 4 %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %67) %69 = shl i32 %66, 4 %70 = add i32 %69, 4 %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %70) %72 = shl i32 %66, 4 %73 = add i32 %72, 8 %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %73) %75 = shl i32 %66, 4 %76 = add i32 %75, 12 %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %76) %78 = fmul float %31, %68 %79 = fmul float %32, %71 %80 = fadd float %78, %79 %81 = fmul float %33, %74 %82 = fadd float %80, %81 %83 = fmul float %34, %77 %84 = fadd float %82, %83 %85 = fadd float 0x3FF19999A0000000, %40 %86 = fptosi float %85 to i32 %87 = bitcast i32 %86 to float %88 = fadd float %40, 0x3FB99999A0000000 %89 = fptosi float %88 to i32 %90 = bitcast i32 %89 to float %91 = bitcast float %90 to i32 %92 = shl i32 %91, 4 %93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %92) %94 = shl i32 %91, 4 %95 = add i32 %94, 4 %96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %95) %97 = shl i32 %91, 4 %98 = add i32 %97, 8 %99 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %98) %100 = shl i32 %91, 4 %101 = add i32 %100, 12 %102 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %101) %103 = bitcast float %87 to i32 %104 = shl i32 %103, 4 %105 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %104) %106 = fmul float %17, %105 %107 = fadd float %106, %93 %108 = shl i32 %103, 4 %109 = add i32 %108, 4 %110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %109) %111 = fmul float %18, %110 %112 = fadd float %111, %96 %113 = shl i32 %103, 4 %114 = add i32 %113, 8 %115 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %114) %116 = fmul float %19, %115 %117 = fadd float %116, %99 %118 = shl i32 %103, 4 %119 = add i32 %118, 12 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = fmul float %20, %120 %122 = fadd float %121, %102 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %107, float %112, float %117, float %122) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %25, float %26, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %62, float %84, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 v_mov_b32_e32 v5, 0x3dcccccd ; 7E0A02FF 3DCCCCCD s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, 4.0, v1, v5 ; D2820005 041602F6 v_cvt_i32_f32_e32 v5, v5 ; 7E0A1105 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v6, v5, s[0:3], 0 offen ; E0301000 80000605 v_mov_b32_e32 v7, 0x3f8ccccd ; 7E0E02FF 3F8CCCCD v_mad_f32 v7, 4.0, v1, v7 ; D2820007 041E02F6 v_cvt_i32_f32_e32 v7, v7 ; 7E0E1107 v_lshlrev_b32_e32 v7, 4, v7 ; 340E0E84 buffer_load_dword v8, v7, s[0:3], 0 offen ; E0301000 80000807 buffer_load_format_xyzw v[9:12], v0, s[4:7], 0 idxen ; E00C2000 80010900 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, v9, v8, v6 ; D2820006 041A1109 v_or_b32_e32 v8, 12, v5 ; 38100A8C buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 v_or_b32_e32 v13, 12, v7 ; 381A0E8C buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v8, v12, v13, v8 ; D2820008 04221B0C v_or_b32_e32 v13, 8, v5 ; 381A0A88 buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D v_or_b32_e32 v14, 8, v7 ; 381C0E88 buffer_load_dword v14, v14, s[0:3], 0 offen ; E0301000 80000E0E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v13, v11, v14, v13 ; D282000D 04361D0B v_or_b32_e32 v5, 4, v5 ; 380A0A84 buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 v_or_b32_e32 v7, 4, v7 ; 380E0E84 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, v10, v7, v5 ; D2820005 04160F0A exp 15, 32, 0, 0, 0, v6, v5, v13, v8 ; F800020F 080D0506 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 v_mov_b32_e32 v9, 0 ; 7E120280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v5, v6, v9, v9 ; F800021F 09090605 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v5, 0x40466666 ; 7E0A02FF 40466666 v_mad_f32 v5, 4.0, v1, v5 ; D2820005 041602F6 v_cvt_i32_f32_e32 v5, v5 ; 7E0A1105 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 v_or_b32_e32 v6, 4, v5 ; 380C0A84 buffer_load_dword v6, v6, s[0:3], 0 offen ; E0301000 80000606 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v6, v11 ; 10001706 buffer_load_dword v6, v5, s[0:3], 0 offen ; E0301000 80000605 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v10, v6, v0 ; D2820000 04020D0A v_or_b32_e32 v6, 8, v5 ; 380C0A88 buffer_load_dword v6, v6, s[0:3], 0 offen ; E0301000 80000606 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v12, v6, v0 ; D2820000 04020D0C v_or_b32_e32 v5, 12, v5 ; 380A0A8C buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v13, v5, v0 ; D2820000 04020B0D v_mov_b32_e32 v5, 0x40066666 ; 7E0A02FF 40066666 v_mad_f32 v1, 4.0, v1, v5 ; D2820001 041602F6 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_or_b32_e32 v2, 4, v1 ; 38040284 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v2, v11 ; 10041702 buffer_load_dword v3, v1, s[0:3], 0 offen ; E0301000 80000301 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v10, v3, v2 ; D2820002 040A070A v_or_b32_e32 v3, 8, v1 ; 38060288 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v12, v3, v2 ; D2820002 040A070C v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v13, v1, v2 ; D2820001 040A030D v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 12, 0, 1, 0, v1, v0, v9, v2 ; F80008CF 02090001 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MOV TEMP[1].xy, IN[1].xyyy 2: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D 3: MUL TEMP[1].x, IN[0].wwww, TEMP[1].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 2) %39 = extractelement <4 x float> %38, i32 3 %40 = fmul float %29, %39 %41 = call i32 @llvm.SI.packf16(float %26, float %27) %42 = bitcast i32 %41 to float %43 = call i32 @llvm.SI.packf16(float %28, float %40) %44 = bitcast i32 %43 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %42, float %44, float %42, float %44) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 v_cvt_pkrtz_f16_f32_e32 v2, v3, v2 ; 5E040503 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v3, 8, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[4:11], s[0:3] ; F0800800 00010303 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, v3, v4 ; 10060903 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_cvt_pkrtz_f16_f32_e32 v0, v4, v3 ; 5E000704 exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..3] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[2] 2: DP4 TEMP[1].x, IN[2], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MAD TEMP[1], IN[0], CONST[1], CONST[0] 5: MOV TEMP[2].xy, IN[1].xyxx 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: MOV OUT[2], TEMP[2] 9: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = extractelement <4 x float> %46, i32 3 %51 = fmul float %47, %21 %52 = fmul float %48, %22 %53 = fadd float %51, %52 %54 = fmul float %49, %23 %55 = fadd float %53, %54 %56 = fmul float %50, %24 %57 = fadd float %55, %56 %58 = fmul float %47, %25 %59 = fmul float %48, %26 %60 = fadd float %58, %59 %61 = fmul float %49, %27 %62 = fadd float %60, %61 %63 = fmul float %50, %28 %64 = fadd float %62, %63 %65 = fmul float %33, %17 %66 = fadd float %65, %13 %67 = fmul float %34, %18 %68 = fadd float %67, %14 %69 = fmul float %35, %19 %70 = fadd float %69, %15 %71 = fmul float %36, %20 %72 = fadd float %71, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %66, float %68, float %70, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %57, float %64, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v5, s5, v4, v5 ; D2820005 04160805 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s4 ; 7E0C0204 v_mad_f32 v6, s5, v3, v6 ; D2820006 041A0605 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s4 ; 7E0E0204 v_mad_f32 v7, s5, v2, v7 ; D2820007 041E0405 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x4 ; C2028104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s4 ; 7E100204 v_mad_f32 v1, s5, v1, v8 ; D2820001 04220205 exp 15, 32, 0, 0, 0, v1, v7, v6, v5 ; F800020F 05060701 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v5, v5 ; F800021F 05050201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v1 ; 100C0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v2, s4, v6 ; D2820006 04180902 s_buffer_load_dword s0, s[0:3], 0xb ; C200010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v6 ; D2820000 04180103 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v0, v4, v5, v1 ; F80008CF 01050400 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MOV TEMP[1].xy, IN[1].xyyy 2: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D 3: MUL TEMP[1].x, IN[0].wwww, TEMP[1].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 2) %39 = extractelement <4 x float> %38, i32 3 %40 = fmul float %29, %39 %41 = call i32 @llvm.SI.packf16(float %26, float %27) %42 = bitcast i32 %41 to float %43 = call i32 @llvm.SI.packf16(float %28, float %40) %44 = bitcast i32 %43 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %42, float %44, float %42, float %44) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 v_cvt_pkrtz_f16_f32_e32 v2, v3, v2 ; 5E040503 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v3, 8, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[4:11], s[0:3] ; F0800800 00010303 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, v3, v4 ; 10060903 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_cvt_pkrtz_f16_f32_e32 v0, v4, v3 ; 5E000704 exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL CONST[0..143] DCL TEMP[0..3], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 1530.0599, 2.1000} IMM[1] FLT32 { 3.1000, 4.1000, 5.1000, 255.0100} IMM[2] FLT32 { 6.0000, 0.1000, 1.1000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IMM[0].zzzz, IN[0].zzzz 2: ADD TEMP[2].x, IMM[0].wwww, TEMP[1].xxxx 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[1], CONST[ADDR[0].x] 7: ADD TEMP[2].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[2].x, TEMP[2].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ADD TEMP[2].x, IMM[1].yyyy, TEMP[1].xxxx 13: F2I TEMP[2].x, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 17: ADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 18: F2I TEMP[1].x, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: DP4 TEMP[1].x, IN[1], CONST[ADDR[0].x] 21: MOV TEMP[2].y, TEMP[1].xxxx 22: MUL TEMP[1].x, IN[0].zzzz, IMM[1].wwww 23: MAD TEMP[3].x, TEMP[1].xxxx, IMM[2].xxxx, IMM[2].yyyy 24: F2I TEMP[3].x, TEMP[3].xxxx 25: UARL ADDR[0].x, TEMP[3].xxxx 26: MOV TEMP[3], CONST[ADDR[0].x] 27: MAD TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx, IMM[2].zzzz 28: F2I TEMP[1].x, TEMP[1].xxxx 29: UARL ADDR[0].x, TEMP[1].xxxx 30: MOV TEMP[1], CONST[ADDR[0].x] 31: MOV TEMP[2].xy, TEMP[2].xyxx 32: MOV OUT[1], IN[0] 33: MOV OUT[2], TEMP[3] 34: MOV OUT[3], TEMP[1] 35: MOV OUT[0], TEMP[0] 36: MOV OUT[4], TEMP[2] 37: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = fmul float 0x4097E83D60000000, %19 %30 = fadd float 0x4000CCCCC0000000, %29 %31 = fptosi float %30 to i32 %32 = bitcast i32 %31 to float %33 = bitcast float %32 to i32 %34 = shl i32 %33, 4 %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %34) %36 = shl i32 %33, 4 %37 = add i32 %36, 4 %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %37) %39 = shl i32 %33, 4 %40 = add i32 %39, 8 %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %40) %42 = shl i32 %33, 4 %43 = add i32 %42, 12 %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %43) %45 = fmul float %25, %35 %46 = fmul float %26, %38 %47 = fadd float %45, %46 %48 = fmul float %27, %41 %49 = fadd float %47, %48 %50 = fmul float %28, %44 %51 = fadd float %49, %50 %52 = fadd float 0x4008CCCCC0000000, %29 %53 = fptosi float %52 to i32 %54 = bitcast i32 %53 to float %55 = bitcast float %54 to i32 %56 = shl i32 %55, 4 %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %56) %58 = shl i32 %55, 4 %59 = add i32 %58, 4 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = shl i32 %55, 4 %62 = add i32 %61, 8 %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %62) %64 = shl i32 %55, 4 %65 = add i32 %64, 12 %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %65) %67 = fmul float %25, %57 %68 = fmul float %26, %60 %69 = fadd float %67, %68 %70 = fmul float %27, %63 %71 = fadd float %69, %70 %72 = fmul float %28, %66 %73 = fadd float %71, %72 %74 = fadd float 0x4010666660000000, %29 %75 = fptosi float %74 to i32 %76 = bitcast i32 %75 to float %77 = bitcast float %76 to i32 %78 = shl i32 %77, 4 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = shl i32 %77, 4 %81 = add i32 %80, 4 %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %81) %83 = shl i32 %77, 4 %84 = add i32 %83, 8 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = shl i32 %77, 4 %87 = add i32 %86, 12 %88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %87) %89 = fmul float %25, %79 %90 = fmul float %26, %82 %91 = fadd float %89, %90 %92 = fmul float %27, %85 %93 = fadd float %91, %92 %94 = fmul float %28, %88 %95 = fadd float %93, %94 %96 = fadd float 0x4014666660000000, %29 %97 = fptosi float %96 to i32 %98 = bitcast i32 %97 to float %99 = bitcast float %98 to i32 %100 = shl i32 %99, 4 %101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %100) %102 = shl i32 %99, 4 %103 = add i32 %102, 4 %104 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %103) %105 = shl i32 %99, 4 %106 = add i32 %105, 8 %107 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %106) %108 = shl i32 %99, 4 %109 = add i32 %108, 12 %110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %109) %111 = fmul float %25, %101 %112 = fmul float %26, %104 %113 = fadd float %111, %112 %114 = fmul float %27, %107 %115 = fadd float %113, %114 %116 = fmul float %28, %110 %117 = fadd float %115, %116 %118 = fmul float %19, 0x406FE051E0000000 %119 = fmul float %118, 6.000000e+00 %120 = fadd float %119, 0x3FB99999A0000000 %121 = fptosi float %120 to i32 %122 = bitcast i32 %121 to float %123 = bitcast float %122 to i32 %124 = shl i32 %123, 4 %125 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %124) %126 = shl i32 %123, 4 %127 = add i32 %126, 4 %128 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %127) %129 = shl i32 %123, 4 %130 = add i32 %129, 8 %131 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %130) %132 = shl i32 %123, 4 %133 = add i32 %132, 12 %134 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %133) %135 = fmul float %118, 6.000000e+00 %136 = fadd float %135, 0x3FF19999A0000000 %137 = fptosi float %136 to i32 %138 = bitcast i32 %137 to float %139 = bitcast float %138 to i32 %140 = shl i32 %139, 4 %141 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %140) %142 = shl i32 %139, 4 %143 = add i32 %142, 4 %144 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %143) %145 = shl i32 %139, 4 %146 = add i32 %145, 8 %147 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %146) %148 = shl i32 %139, 4 %149 = add i32 %148, 12 %150 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %149) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %125, float %128, float %131, float %134) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %141, float %144, float %147, float %150) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %95, float %117, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %73, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 v_mul_f32_e32 v5, 0x437f028f, v3 ; 100A06FF 437F028F v_mov_b32_e32 v6, 0x3dcccccd ; 7E0C02FF 3DCCCCCD v_mov_b32_e32 v7, 0x40c00000 ; 7E0E02FF 40C00000 v_mad_f32 v6, v7, v5, v6 ; D2820006 041A0B07 v_cvt_i32_f32_e32 v6, v6 ; 7E0C1106 v_lshlrev_b32_e32 v6, 4, v6 ; 340C0C84 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F buffer_load_dword v8, v6, s[0:3], 0 offen ; E0301000 80000806 v_or_b32_e32 v9, 12, v6 ; 38120C8C buffer_load_dword v9, v9, s[0:3], 0 offen ; E0301000 80000909 v_or_b32_e32 v10, 8, v6 ; 38140C88 buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A v_or_b32_e32 v6, 4, v6 ; 380C0C84 buffer_load_dword v6, v6, s[0:3], 0 offen ; E0301000 80000606 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v8, v6, v10, v9 ; F800021F 090A0608 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v6, 0x3f8ccccd ; 7E0C02FF 3F8CCCCD v_mad_f32 v5, v7, v5, v6 ; D2820005 041A0B07 v_cvt_i32_f32_e32 v5, v5 ; 7E0A1105 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 buffer_load_dword v6, v5, s[0:3], 0 offen ; E0301000 80000605 v_or_b32_e32 v7, 12, v5 ; 380E0A8C buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 v_or_b32_e32 v8, 8, v5 ; 38100A88 buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 v_or_b32_e32 v5, 4, v5 ; 380A0A84 buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 34, 0, 0, 0, v6, v5, v8, v7 ; F800022F 07080506 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v5, 0x40a33333 ; 7E0A02FF 40A33333 v_mov_b32_e32 v6, 0x44bf41eb ; 7E0C02FF 44BF41EB v_mad_f32 v5, v6, v3, v5 ; D2820005 04160706 v_cvt_i32_f32_e32 v5, v5 ; 7E0A1105 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 v_or_b32_e32 v7, 4, v5 ; 380E0A84 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 buffer_load_format_xyzw v[8:11], v0, s[8:11], 0 idxen ; E00C2000 80020800 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v7, v9 ; 10001307 buffer_load_dword v7, v5, s[0:3], 0 offen ; E0301000 80000705 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v8, v7, v0 ; D2820000 04020F08 v_or_b32_e32 v7, 8, v5 ; 380E0A88 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v10, v7, v0 ; D2820000 04020F0A v_or_b32_e32 v5, 12, v5 ; 380A0A8C buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v11, v5, v0 ; D2820000 04020B0B v_mov_b32_e32 v5, 0x40833333 ; 7E0A02FF 40833333 v_mad_f32 v5, v6, v3, v5 ; D2820005 04160706 v_cvt_i32_f32_e32 v5, v5 ; 7E0A1105 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 v_or_b32_e32 v7, 4, v5 ; 380E0A84 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v7, v7, v9 ; 100E1307 buffer_load_dword v12, v5, s[0:3], 0 offen ; E0301000 80000C05 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v8, v12, v7 ; D2820007 041E1908 v_or_b32_e32 v12, 8, v5 ; 38180A88 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v10, v12, v7 ; D2820007 041E190A v_or_b32_e32 v5, 12, v5 ; 380A0A8C buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, v11, v5, v7 ; D2820005 041E0B0B v_mov_b32_e32 v7, 0 ; 7E0E0280 exp 15, 35, 0, 0, 0, v5, v0, v7, v7 ; F800023F 07070005 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0x40466666 ; 7E0002FF 40466666 v_mad_f32 v0, v6, v3, v0 ; D2820000 04020706 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_or_b32_e32 v5, 4, v0 ; 380A0084 buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v5, v5, v9 ; 100A1305 buffer_load_dword v12, v0, s[0:3], 0 offen ; E0301000 80000C00 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, v8, v12, v5 ; D2820005 04161908 v_or_b32_e32 v12, 8, v0 ; 38180088 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, v10, v12, v5 ; D2820005 0416190A v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v11, v0, v5 ; D2820000 0416010B v_mov_b32_e32 v5, 0x40066666 ; 7E0A02FF 40066666 v_mad_f32 v1, v6, v3, v5 ; D2820001 04160706 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_or_b32_e32 v2, 4, v1 ; 38040284 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v2, v9 ; 10041302 buffer_load_dword v3, v1, s[0:3], 0 offen ; E0301000 80000301 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v8, v3, v2 ; D2820002 040A0708 v_or_b32_e32 v3, 8, v1 ; 38060288 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v10, v3, v2 ; D2820002 040A070A v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v11, v1, v2 ; D2820001 040A030B v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 12, 0, 1, 0, v1, v0, v7, v2 ; F80008CF 02070001 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[2], IN[1] 3: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = bitcast <8 x i32> %23 to <32 x i8> %42 = bitcast <4 x i32> %25 to <16 x i8> %43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %41, <16 x i8> %42, i32 2) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = extractelement <4 x float> %43, i32 2 %47 = extractelement <4 x float> %43, i32 3 %48 = fmul float %44, %31 %49 = fadd float %48, %27 %50 = fmul float %45, %32 %51 = fadd float %50, %28 %52 = fmul float %46, %33 %53 = fadd float %52, %29 %54 = fmul float %47, %34 %55 = fadd float %54, %30 %56 = fmul float %55, %26 %57 = call i32 @llvm.SI.packf16(float %49, float %51) %58 = bitcast i32 %57 to float %59 = call i32 @llvm.SI.packf16(float %53, float %56) %60 = bitcast i32 %59 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %58, float %60, float %58, float %60) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 3, [m0] ; C80C0D00 v_interp_p2_f32 v3, [v3], v1, 1, 3, [m0] ; C80D0D01 v_interp_p1_f32 v2, v0, 0, 3, [m0] ; C8080C00 v_interp_p2_f32 v2, [v2], v1, 0, 3, [m0] ; C8090C01 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010202 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, v3, v7, v6 ; D2820006 041A0F03 v_interp_p1_f32 v7, v0, 0, 1, [m0] ; C81C0400 v_interp_p2_f32 v7, [v7], v1, 0, 1, [m0] ; C81D0401 v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800 v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801 v_mad_f32 v7, v2, v8, v7 ; D2820007 041E1102 v_cvt_pkrtz_f16_f32_e32 v6, v7, v6 ; 5E0C0D07 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 v_interp_p1_f32 v8, v0, 2, 2, [m0] ; C8200A00 v_interp_p2_f32 v8, [v8], v1, 2, 2, [m0] ; C8210A01 v_mad_f32 v7, v4, v8, v7 ; D2820007 041E1104 v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700 v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701 v_interp_p1_f32 v9, v0, 3, 2, [m0] ; C8240B00 v_interp_p2_f32 v9, [v9], v1, 3, 2, [m0] ; C8250B01 v_mad_f32 v2, v5, v9, v8 ; D2820002 04221305 v_interp_p1_f32 v3, v0, 3, 0, [m0] ; C80C0300 v_interp_p2_f32 v3, [v3], v1, 3, 0, [m0] ; C80D0301 v_mul_f32_e32 v0, v3, v2 ; 10000503 v_cvt_pkrtz_f16_f32_e32 v0, v7, v0 ; 5E000107 exp 15, 0, 1, 1, 1, v6, v0, v6, v0 ; F8001C0F 00060006 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[2] 2: DP4 TEMP[1].x, IN[1], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[1], CONST[4] 5: DP4 TEMP[2].x, IN[1], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], IN[0] 9: MOV OUT[2], CONST[0] 10: MOV OUT[3], CONST[1] 11: MOV OUT[0], TEMP[0] 12: MOV OUT[4], TEMP[1] 13: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %49, %21 %54 = fmul float %50, %22 %55 = fadd float %53, %54 %56 = fmul float %51, %23 %57 = fadd float %55, %56 %58 = fmul float %52, %24 %59 = fadd float %57, %58 %60 = fmul float %49, %25 %61 = fmul float %50, %26 %62 = fadd float %60, %61 %63 = fmul float %51, %27 %64 = fadd float %62, %63 %65 = fmul float %52, %28 %66 = fadd float %64, %65 %67 = fmul float %49, %29 %68 = fmul float %50, %30 %69 = fadd float %67, %68 %70 = fmul float %51, %31 %71 = fadd float %69, %70 %72 = fmul float %52, %32 %73 = fadd float %71, %72 %74 = fmul float %49, %33 %75 = fmul float %50, %34 %76 = fadd float %74, %75 %77 = fmul float %51, %35 %78 = fadd float %76, %77 %79 = fmul float %52, %36 %80 = fadd float %78, %79 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %73, float %80, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %66, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 34, 0, 0, 0, v1, v2, v3, v4 ; F800022F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 v_mov_b32_e32 v6, 0 ; 7E0C0280 exp 15, 35, 0, 0, 0, v5, v4, v6, v6 ; F800023F 06060405 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s0, s[0:3], 0xb ; C200010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v5 ; D2820000 04140103 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v0, v4, v6, v1 ; F80008CF 01060400 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[2], IN[1] 3: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = bitcast <8 x i32> %23 to <32 x i8> %42 = bitcast <4 x i32> %25 to <16 x i8> %43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %41, <16 x i8> %42, i32 2) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = extractelement <4 x float> %43, i32 2 %47 = extractelement <4 x float> %43, i32 3 %48 = fmul float %44, %31 %49 = fadd float %48, %27 %50 = fmul float %45, %32 %51 = fadd float %50, %28 %52 = fmul float %46, %33 %53 = fadd float %52, %29 %54 = fmul float %47, %34 %55 = fadd float %54, %30 %56 = fmul float %55, %26 %57 = call i32 @llvm.SI.packf16(float %49, float %51) %58 = bitcast i32 %57 to float %59 = call i32 @llvm.SI.packf16(float %53, float %56) %60 = bitcast i32 %59 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %58, float %60, float %58, float %60) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 3, [m0] ; C80C0D00 v_interp_p2_f32 v3, [v3], v1, 1, 3, [m0] ; C80D0D01 v_interp_p1_f32 v2, v0, 0, 3, [m0] ; C8080C00 v_interp_p2_f32 v2, [v2], v1, 0, 3, [m0] ; C8090C01 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010202 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, v3, v7, v6 ; D2820006 041A0F03 v_interp_p1_f32 v7, v0, 0, 1, [m0] ; C81C0400 v_interp_p2_f32 v7, [v7], v1, 0, 1, [m0] ; C81D0401 v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800 v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801 v_mad_f32 v7, v2, v8, v7 ; D2820007 041E1102 v_cvt_pkrtz_f16_f32_e32 v6, v7, v6 ; 5E0C0D07 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 v_interp_p1_f32 v8, v0, 2, 2, [m0] ; C8200A00 v_interp_p2_f32 v8, [v8], v1, 2, 2, [m0] ; C8210A01 v_mad_f32 v7, v4, v8, v7 ; D2820007 041E1104 v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700 v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701 v_interp_p1_f32 v9, v0, 3, 2, [m0] ; C8240B00 v_interp_p2_f32 v9, [v9], v1, 3, 2, [m0] ; C8250B01 v_mad_f32 v2, v5, v9, v8 ; D2820002 04221305 v_interp_p1_f32 v3, v0, 3, 0, [m0] ; C80C0300 v_interp_p2_f32 v3, [v3], v1, 3, 0, [m0] ; C80D0301 v_mul_f32_e32 v0, v3, v2 ; 10000503 v_cvt_pkrtz_f16_f32_e32 v0, v7, v0 ; 5E000107 exp 15, 0, 1, 1, 1, v6, v0, v6, v0 ; F8001C0F 00060006 s_endpgm ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %21 %46 = fmul float %42, %22 %47 = fadd float %45, %46 %48 = fmul float %43, %23 %49 = fadd float %47, %48 %50 = fmul float %44, %24 %51 = fadd float %49, %50 %52 = fmul float %41, %25 %53 = fmul float %42, %26 %54 = fadd float %52, %53 %55 = fmul float %43, %27 %56 = fadd float %54, %55 %57 = fmul float %44, %28 %58 = fadd float %56, %57 %59 = fmul float %41, %29 %60 = fmul float %42, %30 %61 = fadd float %59, %60 %62 = fmul float %43, %31 %63 = fadd float %61, %62 %64 = fmul float %44, %32 %65 = fadd float %63, %64 %66 = fmul float %41, %33 %67 = fmul float %42, %34 %68 = fadd float %66, %67 %69 = fmul float %43, %35 %70 = fadd float %68, %69 %71 = fmul float %44, %36 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %65, float %72, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 v_mov_b32_e32 v6, 0 ; 7E0C0280 exp 15, 34, 0, 0, 0, v5, v4, v6, v6 ; F800022F 06060405 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s0, s[0:3], 0xb ; C200010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v5 ; D2820000 04140103 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v0, v4, v6, v1 ; F80008CF 01060400 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..2] DCL CONST[4] DCL CONST[6] DCL TEMP[0..10], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[2].xyxx 1: MOV TEMP[1], IMM[0].xxxx 2: MOV TEMP[2], IMM[0].xxxx 3: MOV TEMP[3].y, IMM[0].xxxx 4: MOV TEMP[3].x, -CONST[0].xxxx 5: BGNLOOP :0 6: FSLT TEMP[4].x, CONST[0].xxxx, TEMP[3].xxxx 7: UIF TEMP[4].xxxx :0 8: BRK 9: ENDIF 10: MOV TEMP[3].y, -CONST[0].yyyy 11: BGNLOOP :0 12: FSLT TEMP[5].x, CONST[0].yyyy, TEMP[3].yyyy 13: UIF TEMP[5].xxxx :0 14: BRK 15: ENDIF 16: ADD TEMP[6].xy, CONST[1].xyyy, TEMP[3].xyyy 17: MAD TEMP[7].xy, TEMP[6].xyyy, CONST[6].xyyy, TEMP[0].xyyy 18: MOV TEMP[8].xy, TEMP[7].xyyy 19: MOV TEMP[8].w, IMM[0].xxxx 20: TXB TEMP[9], TEMP[8], SAMP[1], 2D 21: ADD TEMP[2], TEMP[2], TEMP[9] 22: ADD TEMP[10].x, TEMP[3].yyyy, IMM[0].yyyy 23: MOV TEMP[3].y, TEMP[10].xxxx 24: ENDLOOP :0 25: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy 26: ENDLOOP :0 27: MUL TEMP[1].w, TEMP[2], CONST[0].wwww 28: MUL TEMP[0].xy, IN[2].xyyy, CONST[4].xyyy 29: MOV TEMP[0].xy, TEMP[0].xyyy 30: MOV TEMP[0].w, IMM[0].xxxx 31: TXB TEMP[0], TEMP[0], SAMP[0], 2D 32: ADD TEMP[2].x, IMM[0].yyyy, -TEMP[0].wwww 33: MUL TEMP[2].x, TEMP[1].wwww, TEMP[2].xxxx 34: MUL TEMP[2].x, TEMP[2].xxxx, CONST[0].zzzz 35: MOV_SAT TEMP[2].x, TEMP[2].xxxx 36: MAD TEMP[1], CONST[2], TEMP[2].xxxx, TEMP[0] 37: MOV TEMP[0].w, IMM[0].yyyy 38: MOV TEMP[0].xyz, IN[1].xyzx 39: MUL TEMP[0], TEMP[1], TEMP[0] 40: MUL TEMP[1], TEMP[0], IN[1].wwww 41: MAD TEMP[1], IN[0], TEMP[1].wwww, TEMP[1] 42: MOV OUT[0], TEMP[1] 43: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %38 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %56 = fsub float -0.000000e+00, %24 %57 = fsub float -0.000000e+00, %25 %58 = bitcast <8 x i32> %43 to <32 x i8> %59 = bitcast <4 x i32> %45 to <16 x i8> br label %LOOP LOOP: ; preds = %IF47, %main_body %temp12.0 = phi float [ %56, %main_body ], [ %125, %IF47 ] %temp11.0 = phi float [ 0.000000e+00, %main_body ], [ %temp11.1.lcssa, %IF47 ] %60 = fcmp olt float %24, %temp12.0 %61 = sext i1 %60 to i32 %62 = bitcast i32 %61 to float %63 = bitcast float %62 to i32 %64 = icmp ne i32 %63, 0 br i1 %64, label %IF, label %ENDIF IF: ; preds = %LOOP %temp11.0.lcssa = phi float [ %temp11.0, %LOOP ] %65 = fmul float %temp11.0.lcssa, %27 %66 = fmul float %54, %34 %67 = fmul float %55, %35 %68 = bitcast float 0.000000e+00 to i32 %69 = bitcast float %66 to i32 %70 = bitcast float %67 to i32 %71 = insertelement <4 x i32> undef, i32 %68, i32 0 %72 = insertelement <4 x i32> %71, i32 %69, i32 1 %73 = insertelement <4 x i32> %72, i32 %70, i32 2 %74 = insertelement <4 x i32> %73, i32 undef, i32 3 %75 = bitcast <8 x i32> %39 to <32 x i8> %76 = bitcast <4 x i32> %41 to <16 x i8> %77 = call <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32> %74, <32 x i8> %75, <16 x i8> %76, i32 2) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = extractelement <4 x float> %77, i32 2 %81 = extractelement <4 x float> %77, i32 3 %82 = fsub float -0.000000e+00, %81 %83 = fadd float 1.000000e+00, %82 %84 = fmul float %65, %83 %85 = fmul float %84, %26 %86 = call float @llvm.AMDIL.clamp.(float %85, float 0.000000e+00, float 1.000000e+00) %87 = fmul float %30, %86 %88 = fadd float %87, %78 %89 = fmul float %31, %86 %90 = fadd float %89, %79 %91 = fmul float %32, %86 %92 = fadd float %91, %80 %93 = fmul float %33, %86 %94 = fadd float %93, %81 %95 = fmul float %88, %50 %96 = fmul float %90, %51 %97 = fmul float %92, %52 %98 = fmul float %94, 1.000000e+00 %99 = fmul float %95, %53 %100 = fmul float %96, %53 %101 = fmul float %97, %53 %102 = fmul float %98, %53 %103 = fmul float %46, %102 %104 = fadd float %103, %99 %105 = fmul float %47, %102 %106 = fadd float %105, %100 %107 = fmul float %48, %102 %108 = fadd float %107, %101 %109 = fmul float %49, %102 %110 = fadd float %109, %102 %111 = call i32 @llvm.SI.packf16(float %104, float %106) %112 = bitcast i32 %111 to float %113 = call i32 @llvm.SI.packf16(float %108, float %110) %114 = bitcast i32 %113 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %112, float %114, float %112, float %114) ret void ENDIF: ; preds = %LOOP %115 = fadd float %28, %temp12.0 %116 = fmul float %115, %36 %117 = fadd float %116, %54 %118 = bitcast float %117 to i32 %119 = insertelement <4 x i32> , i32 %118, i32 1 br label %LOOP45 LOOP45: ; preds = %ENDIF46, %ENDIF %temp13.0 = phi float [ %57, %ENDIF ], [ %135, %ENDIF46 ] %temp11.1 = phi float [ %temp11.0, %ENDIF ], [ %134, %ENDIF46 ] %120 = fcmp olt float %25, %temp13.0 %121 = sext i1 %120 to i32 %122 = bitcast i32 %121 to float %123 = bitcast float %122 to i32 %124 = icmp ne i32 %123, 0 br i1 %124, label %IF47, label %ENDIF46 IF47: ; preds = %LOOP45 %temp11.1.lcssa = phi float [ %temp11.1, %LOOP45 ] %125 = fadd float %temp12.0, 1.000000e+00 br label %LOOP ENDIF46: ; preds = %LOOP45 %126 = fadd float %29, %temp13.0 %127 = fmul float %126, %37 %128 = fadd float %127, %55 %129 = bitcast float %128 to i32 %130 = insertelement <4 x i32> %119, i32 %129, i32 2 %131 = insertelement <4 x i32> %130, i32 undef, i32 3 %132 = call <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32> %131, <32 x i8> %58, <16 x i8> %59, i32 2) %133 = extractelement <4 x float> %132, i32 3 %134 = fadd float %temp11.1, %133 %135 = fadd float %temp13.0, 1.000000e+00 br label %LOOP45 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v5, v0, 1, 2, [m0] ; C8140900 v_interp_p2_f32 v5, [v5], v1, 1, 2, [m0] ; C8150901 v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800 v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801 v_interp_p1_f32 v2, v0, 3, 1, [m0] ; C8080700 v_interp_p2_f32 v2, [v2], v1, 3, 1, [m0] ; C8090701 v_interp_p1_f32 v3, v0, 2, 1, [m0] ; C80C0600 v_interp_p2_f32 v3, [v3], v1, 2, 1, [m0] ; C80D0601 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_interp_p1_f32 v9, v0, 2, 0, [m0] ; C8240200 v_interp_p2_f32 v9, [v9], v1, 2, 0, [m0] ; C8250201 v_interp_p1_f32 v11, v0, 1, 0, [m0] ; C82C0100 v_interp_p2_f32 v11, [v11], v1, 1, 0, [m0] ; C82D0101 v_interp_p1_f32 v10, v0, 0, 0, [m0] ; C8280000 v_interp_p2_f32 v10, [v10], v1, 0, 0, [m0] ; C8290001 s_load_dwordx4 s[40:43], s[2:3], 0x0 ; C0940300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s20, s[40:43], 0x1 ; C20A2901 v_mov_b32_e32 v1, 0x80000000 ; 7E0202FF 80000000 s_waitcnt lgkmcnt(0) ; BF8C007F v_xor_b32_e32 v0, s20, v1 ; 3A000214 s_buffer_load_dword s21, s[40:43], 0x0 ; C20AA900 s_waitcnt lgkmcnt(0) ; BF8C007F v_xor_b32_e32 v12, s21, v1 ; 3A180215 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 s_buffer_load_dword s22, s[40:43], 0x19 ; C20B2919 s_buffer_load_dword s23, s[40:43], 0x18 ; C20BA918 s_buffer_load_dword s6, s[40:43], 0x11 ; C2032911 s_buffer_load_dword s7, s[40:43], 0x10 ; C203A910 s_buffer_load_dword s3, s[40:43], 0xb ; C201A90B s_buffer_load_dword s0, s[40:43], 0xa ; C200290A s_buffer_load_dword s2, s[40:43], 0x9 ; C2012909 s_buffer_load_dword s1, s[40:43], 0x8 ; C200A908 s_buffer_load_dword s38, s[40:43], 0x5 ; C2132905 s_buffer_load_dword s39, s[40:43], 0x4 ; C213A904 s_buffer_load_dword s5, s[40:43], 0x3 ; C202A903 s_buffer_load_dword s4, s[40:43], 0x2 ; C2022902 v_mov_b32_e32 v13, 0 ; 7E1A0280 s_mov_b64 s[40:41], 0 ; BEA80480 s_mov_b64 s[36:37], s[40:41] ; BEA40428 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, v13 ; 7E02030D v_cmp_lt_f32_e32 vcc, s21, v12 ; 7C021815 v_cndmask_b32_e64 v13, 0, -1, vcc ; D200080D 01A98280 v_cmp_eq_i32_e64 s[42:43], v13, 0 ; D104002A 0001010D s_and_saveexec_b64 s[42:43], s[42:43] ; BEAA242A s_xor_b64 s[42:43], exec, s[42:43] ; 89AA2A7E s_cbranch_execz BB0_4 ; BF880000 v_add_f32_e32 v13, s39, v12 ; 061A1827 v_mad_f32 v15, v13, s23, v8 ; D282000F 04202F0D v_mov_b32_e32 v14, 0 ; 7E1C0280 s_mov_b64 s[44:45], s[40:41] ; BEAC0428 v_mov_b32_e32 v18, v0 ; 7E240300 v_mov_b32_e32 v19, v1 ; 7E260301 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v13, v19 ; 7E1A0313 v_cmp_lt_f32_e32 vcc, s20, v18 ; 7C022414 v_cndmask_b32_e64 v19, 0, -1, vcc ; D2000813 01A98280 v_cmp_eq_i32_e64 s[46:47], v19, 0 ; D104002E 00010113 s_and_saveexec_b64 s[46:47], s[46:47] ; BEAE242E s_xor_b64 s[46:47], exec, s[46:47] ; 89AE2E7E v_add_f32_e32 v20, 1.0, v18 ; 062824F2 v_add_f32_e32 v18, s38, v18 ; 06242426 v_mad_f32 v16, v18, s22, v5 ; D2820010 04142D12 image_sample_b v18, 8, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[24:31], s[32:35] ; F0940800 0106120E s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v19, v18, v13 ; 06261B12 v_mov_b32_e32 v18, v20 ; 7E240314 s_or_b64 exec, exec, s[46:47] ; 88FE2E7E s_or_b64 s[44:45], s[46:47], s[44:45] ; 88AC2C2E s_andn2_b64 exec, exec, s[44:45] ; 8AFE2C7E s_cbranch_execnz BB0_5 ; BF890000 s_or_b64 exec, exec, s[44:45] ; 88FE2C7E v_add_f32_e32 v12, 1.0, v12 ; 061818F2 s_or_b64 exec, exec, s[42:43] ; 88FE2A7E s_or_b64 s[36:37], s[42:43], s[36:37] ; 88A4242A s_andn2_b64 exec, exec, s[36:37] ; 8AFE247E s_cbranch_execnz BB0_1 ; BF890000 s_or_b64 exec, exec, s[36:37] ; 88FE247E v_mul_f32_e32 v14, s6, v5 ; 101C0A06 v_mul_f32_e32 v13, s7, v8 ; 101A1007 v_mov_b32_e32 v12, 0 ; 7E180280 image_sample_b v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[12:15], s[8:15], s[16:19] ; F0940F00 00820C0C s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v0, 1.0, v15 ; 08001EF2 v_mul_f32_e32 v1, s5, v1 ; 10020205 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_mul_f32_e32 v0, s4, v0 ; 10000004 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mad_f32 v1, s3, v0, v15 ; D2820001 043E0003 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_mad_f32 v5, s2, v0, v13 ; D2820005 04360002 v_mul_f32_e32 v5, v7, v5 ; 100A0B07 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 v_mad_f32 v5, v11, v1, v5 ; D2820005 0416030B v_mad_f32 v7, s1, v0, v12 ; D2820007 04320001 v_mul_f32_e32 v6, v6, v7 ; 100C0F06 v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mad_f32 v6, v10, v1, v6 ; D2820006 041A030A v_cvt_pkrtz_f16_f32_e32 v5, v6, v5 ; 5E0A0B06 v_mad_f32 v0, s0, v0, v14 ; D2820000 043A0000 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mad_f32 v0, v9, v1, v0 ; D2820000 04020309 v_mad_f32 v1, v4, v1, v1 ; D2820001 04060304 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v5, v0, v5, v0 ; F8001C0F 00050005 s_endpgm ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %21 %46 = fmul float %42, %22 %47 = fadd float %45, %46 %48 = fmul float %43, %23 %49 = fadd float %47, %48 %50 = fmul float %44, %24 %51 = fadd float %49, %50 %52 = fmul float %41, %25 %53 = fmul float %42, %26 %54 = fadd float %52, %53 %55 = fmul float %43, %27 %56 = fadd float %54, %55 %57 = fmul float %44, %28 %58 = fadd float %56, %57 %59 = fmul float %41, %29 %60 = fmul float %42, %30 %61 = fadd float %59, %60 %62 = fmul float %43, %31 %63 = fadd float %61, %62 %64 = fmul float %44, %32 %65 = fadd float %63, %64 %66 = fmul float %41, %33 %67 = fmul float %42, %34 %68 = fadd float %66, %67 %69 = fmul float %43, %35 %70 = fadd float %68, %69 %71 = fmul float %44, %36 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %65, float %72, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 v_mov_b32_e32 v6, 0 ; 7E0C0280 exp 15, 34, 0, 0, 0, v5, v4, v6, v6 ; F800022F 06060405 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s0, s[0:3], 0xb ; C200010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v5 ; D2820000 04140103 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v0, v4, v6, v1 ; F80008CF 01060400 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL CONST[2] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[2].xyxx 1: MOV TEMP[1], IMM[0].xxxx 2: MOV TEMP[2], IMM[0].xxxx 3: MOV TEMP[3].x, -CONST[0].xxxx 4: BGNLOOP :0 5: FSLT TEMP[4].x, CONST[0].xxxx, TEMP[3].xxxx 6: UIF TEMP[4].xxxx :0 7: BRK 8: ENDIF 9: MAD TEMP[5].xy, TEMP[3].xxxx, CONST[2].xyyy, TEMP[0].xyyy 10: MOV TEMP[6].xy, TEMP[5].xyyy 11: MOV TEMP[6].w, IMM[0].xxxx 12: TXB TEMP[7], TEMP[6], SAMP[0], 2D 13: ADD TEMP[2], TEMP[2], TEMP[7] 14: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy 15: ENDLOOP :0 16: MUL TEMP[1], TEMP[2], CONST[0].wwww 17: MOV TEMP[0].w, IMM[0].yyyy 18: MOV TEMP[0].xyz, IN[1].xyzx 19: MUL TEMP[0], TEMP[1], TEMP[0] 20: MUL TEMP[1], TEMP[0], IN[1].wwww 21: MAD TEMP[1], IN[0], TEMP[1].wwww, TEMP[1] 22: MOV OUT[0], TEMP[1] 23: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %28 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %29 = load <8 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %31 = load <4 x i32> addrspace(2)* %30, !tbaa !0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %42 = fsub float -0.000000e+00, %24 %43 = bitcast <8 x i32> %29 to <32 x i8> %44 = bitcast <4 x i32> %31 to <16 x i8> br label %LOOP LOOP: ; preds = %ENDIF, %main_body %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %88, %ENDIF ] %temp9.0 = phi float [ 0.000000e+00, %main_body ], [ %89, %ENDIF ] %temp10.0 = phi float [ 0.000000e+00, %main_body ], [ %90, %ENDIF ] %temp11.0 = phi float [ 0.000000e+00, %main_body ], [ %91, %ENDIF ] %temp12.0 = phi float [ %42, %main_body ], [ %92, %ENDIF ] %45 = fcmp olt float %24, %temp12.0 %46 = sext i1 %45 to i32 %47 = bitcast i32 %46 to float %48 = bitcast float %47 to i32 %49 = icmp ne i32 %48, 0 br i1 %49, label %IF, label %ENDIF IF: ; preds = %LOOP %temp11.0.lcssa = phi float [ %temp11.0, %LOOP ] %temp10.0.lcssa = phi float [ %temp10.0, %LOOP ] %temp9.0.lcssa = phi float [ %temp9.0, %LOOP ] %temp8.0.lcssa = phi float [ %temp8.0, %LOOP ] %50 = fmul float %temp8.0.lcssa, %25 %51 = fmul float %temp9.0.lcssa, %25 %52 = fmul float %temp10.0.lcssa, %25 %53 = fmul float %temp11.0.lcssa, %25 %54 = fmul float %50, %36 %55 = fmul float %51, %37 %56 = fmul float %52, %38 %57 = fmul float %53, 1.000000e+00 %58 = fmul float %54, %39 %59 = fmul float %55, %39 %60 = fmul float %56, %39 %61 = fmul float %57, %39 %62 = fmul float %32, %61 %63 = fadd float %62, %58 %64 = fmul float %33, %61 %65 = fadd float %64, %59 %66 = fmul float %34, %61 %67 = fadd float %66, %60 %68 = fmul float %35, %61 %69 = fadd float %68, %61 %70 = call i32 @llvm.SI.packf16(float %63, float %65) %71 = bitcast i32 %70 to float %72 = call i32 @llvm.SI.packf16(float %67, float %69) %73 = bitcast i32 %72 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %71, float %73, float %71, float %73) ret void ENDIF: ; preds = %LOOP %74 = fmul float %temp12.0, %26 %75 = fadd float %74, %40 %76 = fmul float %temp12.0, %27 %77 = fadd float %76, %41 %78 = bitcast float %75 to i32 %79 = bitcast float %77 to i32 %80 = insertelement <4 x i32> , i32 %78, i32 1 %81 = insertelement <4 x i32> %80, i32 %79, i32 2 %82 = insertelement <4 x i32> %81, i32 undef, i32 3 %83 = call <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32> %82, <32 x i8> %43, <16 x i8> %44, i32 2) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = extractelement <4 x float> %83, i32 3 %88 = fadd float %temp8.0, %84 %89 = fadd float %temp9.0, %85 %90 = fadd float %temp10.0, %86 %91 = fadd float %temp11.0, %87 %92 = fadd float %temp12.0, 1.000000e+00 br label %LOOP } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v4, v0, 1, 2, [m0] ; C8100900 v_interp_p2_f32 v4, [v4], v1, 1, 2, [m0] ; C8110901 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v2, v0, 3, 1, [m0] ; C8080700 v_interp_p2_f32 v2, [v2], v1, 3, 1, [m0] ; C8090701 v_interp_p1_f32 v3, v0, 2, 1, [m0] ; C80C0600 v_interp_p2_f32 v3, [v3], v1, 2, 1, [m0] ; C80D0601 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 0, 1, [m0] ; C8200400 v_interp_p2_f32 v8, [v8], v1, 0, 1, [m0] ; C8210401 v_interp_p1_f32 v6, v0, 3, 0, [m0] ; C8180300 v_interp_p2_f32 v6, [v6], v1, 3, 0, [m0] ; C8190301 v_interp_p1_f32 v9, v0, 2, 0, [m0] ; C8240200 v_interp_p2_f32 v9, [v9], v1, 2, 0, [m0] ; C8250201 v_interp_p1_f32 v10, v0, 1, 0, [m0] ; C8280100 v_interp_p2_f32 v10, [v10], v1, 1, 0, [m0] ; C8290101 v_interp_p1_f32 v11, v0, 0, 0, [m0] ; C82C0000 v_interp_p2_f32 v11, [v11], v1, 0, 0, [m0] ; C82D0001 s_load_dwordx4 s[16:19], s[2:3], 0x0 ; C0880300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[16:19], 0x0 ; C2009100 v_mov_b32_e32 v0, 0x80000000 ; 7E0002FF 80000000 s_waitcnt lgkmcnt(0) ; BF8C007F v_xor_b32_e32 v1, s1, v0 ; 3A020001 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500 s_buffer_load_dword s2, s[16:19], 0x9 ; C2011109 s_buffer_load_dword s3, s[16:19], 0x8 ; C2019108 s_buffer_load_dword s0, s[16:19], 0x3 ; C2001103 v_mov_b32_e32 v15, 0 ; 7E1E0280 s_mov_b64 s[16:17], 0 ; BE900480 v_mov_b32_e32 v18, v15 ; 7E24030F v_mov_b32_e32 v17, v15 ; 7E22030F v_mov_b32_e32 v16, v15 ; 7E20030F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v14, v16 ; 7E1C0310 v_mov_b32_e32 v0, v17 ; 7E000311 v_mov_b32_e32 v13, v18 ; 7E1A0312 v_mov_b32_e32 v12, v15 ; 7E18030F v_cmp_lt_f32_e32 vcc, s1, v1 ; 7C020201 v_cndmask_b32_e64 v15, 0, -1, vcc ; D200080F 01A98280 v_cmp_eq_i32_e64 s[18:19], v15, 0 ; D1040012 0001010F s_and_saveexec_b64 s[18:19], s[18:19] ; BE922412 s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mad_f32 v17, v1, s2, v4 ; D2820011 04100501 v_mad_f32 v16, v1, s3, v5 ; D2820010 04140701 v_mov_b32_e32 v15, 0 ; 7E1E0280 image_sample_b v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[8:15], s[4:7] ; F0940F00 0022130F s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_add_f32_e32 v16, v22, v14 ; 06201D16 v_add_f32_e32 v17, v21, v0 ; 06220115 v_add_f32_e32 v18, v20, v13 ; 06241B14 v_add_f32_e32 v15, v19, v12 ; 061E1913 v_add_f32_e32 v1, 1.0, v1 ; 060202F2 s_or_b64 exec, exec, s[18:19] ; 88FE127E s_or_b64 s[16:17], s[18:19], s[16:17] ; 88901012 s_andn2_b64 exec, exec, s[16:17] ; 8AFE107E s_cbranch_execnz BB0_1 ; BF890000 s_or_b64 exec, exec, s[16:17] ; 88FE107E v_mul_f32_e32 v1, s0, v14 ; 10021C00 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_mul_f32_e32 v4, s0, v13 ; 10081A00 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mul_f32_e32 v4, v2, v4 ; 10080902 v_mad_f32 v4, v10, v1, v4 ; D2820004 0412030A v_mul_f32_e32 v5, s0, v12 ; 100A1800 v_mul_f32_e32 v5, v8, v5 ; 100A0B08 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 v_mad_f32 v5, v11, v1, v5 ; D2820005 0416030B v_cvt_pkrtz_f16_f32_e32 v4, v5, v4 ; 5E080905 v_mul_f32_e32 v0, s0, v0 ; 10000000 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mad_f32 v0, v9, v1, v0 ; D2820000 04020309 v_mad_f32 v1, v6, v1, v1 ; D2820001 04060306 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v4, v0, v4, v0 ; F8001C0F 00040004 s_endpgm ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %21 %46 = fmul float %42, %22 %47 = fadd float %45, %46 %48 = fmul float %43, %23 %49 = fadd float %47, %48 %50 = fmul float %44, %24 %51 = fadd float %49, %50 %52 = fmul float %41, %25 %53 = fmul float %42, %26 %54 = fadd float %52, %53 %55 = fmul float %43, %27 %56 = fadd float %54, %55 %57 = fmul float %44, %28 %58 = fadd float %56, %57 %59 = fmul float %41, %29 %60 = fmul float %42, %30 %61 = fadd float %59, %60 %62 = fmul float %43, %31 %63 = fadd float %61, %62 %64 = fmul float %44, %32 %65 = fadd float %63, %64 %66 = fmul float %41, %33 %67 = fmul float %42, %34 %68 = fadd float %66, %67 %69 = fmul float %43, %35 %70 = fadd float %68, %69 %71 = fmul float %44, %36 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %65, float %72, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 v_mov_b32_e32 v6, 0 ; 7E0C0280 exp 15, 34, 0, 0, 0, v5, v4, v6, v6 ; F800022F 06060405 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s0, s[0:3], 0xb ; C200010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v5 ; D2820000 04140103 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v0, v4, v6, v1 ; F80008CF 01060400 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..2] DCL CONST[4] DCL CONST[6] DCL TEMP[0..10], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[2].xyxx 1: MOV TEMP[1], IMM[0].xxxx 2: MOV TEMP[2], IMM[0].xxxx 3: MOV TEMP[3].y, IMM[0].xxxx 4: MOV TEMP[3].x, -CONST[0].xxxx 5: BGNLOOP :0 6: FSLT TEMP[4].x, CONST[0].xxxx, TEMP[3].xxxx 7: UIF TEMP[4].xxxx :0 8: BRK 9: ENDIF 10: MOV TEMP[3].y, -CONST[0].yyyy 11: BGNLOOP :0 12: FSLT TEMP[5].x, CONST[0].yyyy, TEMP[3].yyyy 13: UIF TEMP[5].xxxx :0 14: BRK 15: ENDIF 16: ADD TEMP[6].xy, CONST[1].xyyy, TEMP[3].xyyy 17: MAD TEMP[7].xy, TEMP[6].xyyy, CONST[6].xyyy, TEMP[0].xyyy 18: MOV TEMP[8].xy, TEMP[7].xyyy 19: MOV TEMP[8].w, IMM[0].xxxx 20: TXB TEMP[9], TEMP[8], SAMP[1], 2D 21: ADD TEMP[2], TEMP[2], TEMP[9] 22: ADD TEMP[10].x, TEMP[3].yyyy, IMM[0].yyyy 23: MOV TEMP[3].y, TEMP[10].xxxx 24: ENDLOOP :0 25: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy 26: ENDLOOP :0 27: MUL TEMP[1].w, TEMP[2], CONST[0].wwww 28: MUL TEMP[0].xy, IN[2].xyyy, CONST[4].xyyy 29: MOV TEMP[0].xy, TEMP[0].xyyy 30: MOV TEMP[0].w, IMM[0].xxxx 31: TXB TEMP[0], TEMP[0], SAMP[0], 2D 32: ADD TEMP[2].x, TEMP[0].wwww, -TEMP[1].wwww 33: MUL TEMP[2].x, TEMP[2].xxxx, CONST[0].zzzz 34: MOV_SAT TEMP[2].x, TEMP[2].xxxx 35: LRP TEMP[2], TEMP[2].xxxx, CONST[2], TEMP[0] 36: MUL TEMP[1], TEMP[2], TEMP[0].wwww 37: MOV TEMP[0].w, IMM[0].yyyy 38: MOV TEMP[0].xyz, IN[1].xyzx 39: MUL TEMP[0], TEMP[1], TEMP[0] 40: MUL TEMP[1], TEMP[0], IN[1].wwww 41: MAD TEMP[1], IN[0], TEMP[1].wwww, TEMP[1] 42: MOV OUT[0], TEMP[1] 43: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %38 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %56 = fsub float -0.000000e+00, %24 %57 = fsub float -0.000000e+00, %25 %58 = bitcast <8 x i32> %43 to <32 x i8> %59 = bitcast <4 x i32> %45 to <16 x i8> br label %LOOP LOOP: ; preds = %IF47, %main_body %temp12.0 = phi float [ %56, %main_body ], [ %124, %IF47 ] %temp11.0 = phi float [ 0.000000e+00, %main_body ], [ %temp11.1.lcssa, %IF47 ] %60 = fcmp olt float %24, %temp12.0 %61 = sext i1 %60 to i32 %62 = bitcast i32 %61 to float %63 = bitcast float %62 to i32 %64 = icmp ne i32 %63, 0 br i1 %64, label %IF, label %ENDIF IF: ; preds = %LOOP %temp11.0.lcssa = phi float [ %temp11.0, %LOOP ] %65 = fmul float %temp11.0.lcssa, %27 %66 = fmul float %54, %34 %67 = fmul float %55, %35 %68 = bitcast float 0.000000e+00 to i32 %69 = bitcast float %66 to i32 %70 = bitcast float %67 to i32 %71 = insertelement <4 x i32> undef, i32 %68, i32 0 %72 = insertelement <4 x i32> %71, i32 %69, i32 1 %73 = insertelement <4 x i32> %72, i32 %70, i32 2 %74 = insertelement <4 x i32> %73, i32 undef, i32 3 %75 = bitcast <8 x i32> %39 to <32 x i8> %76 = bitcast <4 x i32> %41 to <16 x i8> %77 = call <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32> %74, <32 x i8> %75, <16 x i8> %76, i32 2) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = extractelement <4 x float> %77, i32 2 %81 = extractelement <4 x float> %77, i32 3 %82 = fsub float -0.000000e+00, %65 %83 = fadd float %81, %82 %84 = fmul float %83, %26 %85 = call float @llvm.AMDIL.clamp.(float %84, float 0.000000e+00, float 1.000000e+00) %86 = call float @llvm.AMDGPU.lrp(float %85, float %30, float %78) %87 = call float @llvm.AMDGPU.lrp(float %85, float %31, float %79) %88 = call float @llvm.AMDGPU.lrp(float %85, float %32, float %80) %89 = call float @llvm.AMDGPU.lrp(float %85, float %33, float %81) %90 = fmul float %86, %81 %91 = fmul float %87, %81 %92 = fmul float %88, %81 %93 = fmul float %89, %81 %94 = fmul float %90, %50 %95 = fmul float %91, %51 %96 = fmul float %92, %52 %97 = fmul float %93, 1.000000e+00 %98 = fmul float %94, %53 %99 = fmul float %95, %53 %100 = fmul float %96, %53 %101 = fmul float %97, %53 %102 = fmul float %46, %101 %103 = fadd float %102, %98 %104 = fmul float %47, %101 %105 = fadd float %104, %99 %106 = fmul float %48, %101 %107 = fadd float %106, %100 %108 = fmul float %49, %101 %109 = fadd float %108, %101 %110 = call i32 @llvm.SI.packf16(float %103, float %105) %111 = bitcast i32 %110 to float %112 = call i32 @llvm.SI.packf16(float %107, float %109) %113 = bitcast i32 %112 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %111, float %113, float %111, float %113) ret void ENDIF: ; preds = %LOOP %114 = fadd float %28, %temp12.0 %115 = fmul float %114, %36 %116 = fadd float %115, %54 %117 = bitcast float %116 to i32 %118 = insertelement <4 x i32> , i32 %117, i32 1 br label %LOOP45 LOOP45: ; preds = %ENDIF46, %ENDIF %temp13.0 = phi float [ %57, %ENDIF ], [ %134, %ENDIF46 ] %temp11.1 = phi float [ %temp11.0, %ENDIF ], [ %133, %ENDIF46 ] %119 = fcmp olt float %25, %temp13.0 %120 = sext i1 %119 to i32 %121 = bitcast i32 %120 to float %122 = bitcast float %121 to i32 %123 = icmp ne i32 %122, 0 br i1 %123, label %IF47, label %ENDIF46 IF47: ; preds = %LOOP45 %temp11.1.lcssa = phi float [ %temp11.1, %LOOP45 ] %124 = fadd float %temp12.0, 1.000000e+00 br label %LOOP ENDIF46: ; preds = %LOOP45 %125 = fadd float %29, %temp13.0 %126 = fmul float %125, %37 %127 = fadd float %126, %55 %128 = bitcast float %127 to i32 %129 = insertelement <4 x i32> %118, i32 %128, i32 2 %130 = insertelement <4 x i32> %129, i32 undef, i32 3 %131 = call <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32> %130, <32 x i8> %58, <16 x i8> %59, i32 2) %132 = extractelement <4 x float> %131, i32 3 %133 = fadd float %temp11.1, %132 %134 = fadd float %temp13.0, 1.000000e+00 br label %LOOP45 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800 v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801 v_interp_p1_f32 v2, v0, 3, 1, [m0] ; C8080700 v_interp_p2_f32 v2, [v2], v1, 3, 1, [m0] ; C8090701 v_interp_p1_f32 v3, v0, 2, 1, [m0] ; C80C0600 v_interp_p2_f32 v3, [v3], v1, 2, 1, [m0] ; C80D0601 v_interp_p1_f32 v8, v0, 1, 1, [m0] ; C8200500 v_interp_p2_f32 v8, [v8], v1, 1, 1, [m0] ; C8210501 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_interp_p1_f32 v5, v0, 2, 0, [m0] ; C8140200 v_interp_p2_f32 v5, [v5], v1, 2, 0, [m0] ; C8150201 v_interp_p1_f32 v11, v0, 1, 0, [m0] ; C82C0100 v_interp_p2_f32 v11, [v11], v1, 1, 0, [m0] ; C82D0101 v_interp_p1_f32 v10, v0, 0, 0, [m0] ; C8280000 v_interp_p2_f32 v10, [v10], v1, 0, 0, [m0] ; C8290001 s_load_dwordx4 s[40:43], s[2:3], 0x0 ; C0940300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s20, s[40:43], 0x1 ; C20A2901 v_mov_b32_e32 v1, 0x80000000 ; 7E0202FF 80000000 s_waitcnt lgkmcnt(0) ; BF8C007F v_xor_b32_e32 v0, s20, v1 ; 3A000214 s_buffer_load_dword s21, s[40:43], 0x0 ; C20AA900 s_waitcnt lgkmcnt(0) ; BF8C007F v_xor_b32_e32 v1, s21, v1 ; 3A020215 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 s_buffer_load_dword s22, s[40:43], 0x19 ; C20B2919 s_buffer_load_dword s23, s[40:43], 0x18 ; C20BA918 s_buffer_load_dword s4, s[40:43], 0x11 ; C2022911 s_buffer_load_dword s5, s[40:43], 0x10 ; C202A910 s_buffer_load_dword s3, s[40:43], 0xb ; C201A90B s_buffer_load_dword s0, s[40:43], 0xa ; C200290A s_buffer_load_dword s2, s[40:43], 0x9 ; C2012909 s_buffer_load_dword s1, s[40:43], 0x8 ; C200A908 s_buffer_load_dword s38, s[40:43], 0x5 ; C2132905 s_buffer_load_dword s39, s[40:43], 0x4 ; C213A904 s_buffer_load_dword s6, s[40:43], 0x3 ; C2032903 s_buffer_load_dword s7, s[40:43], 0x2 ; C203A902 v_mov_b32_e32 v13, 0 ; 7E1A0280 s_mov_b64 s[40:41], 0 ; BEA80480 s_mov_b64 s[36:37], s[40:41] ; BEA40428 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v12, v13 ; 7E18030D v_cmp_lt_f32_e32 vcc, s21, v1 ; 7C020215 v_cndmask_b32_e64 v13, 0, -1, vcc ; D200080D 01A98280 v_cmp_eq_i32_e64 s[42:43], v13, 0 ; D104002A 0001010D s_and_saveexec_b64 s[42:43], s[42:43] ; BEAA242A s_xor_b64 s[42:43], exec, s[42:43] ; 89AA2A7E s_cbranch_execz BB0_4 ; BF880000 v_add_f32_e32 v13, s39, v1 ; 061A0227 v_mad_f32 v15, v13, s23, v9 ; D282000F 04242F0D v_mov_b32_e32 v14, 0 ; 7E1C0280 s_mov_b64 s[44:45], s[40:41] ; BEAC0428 v_mov_b32_e32 v18, v0 ; 7E240300 v_mov_b32_e32 v19, v12 ; 7E26030C s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v13, v19 ; 7E1A0313 v_cmp_lt_f32_e32 vcc, s20, v18 ; 7C022414 v_cndmask_b32_e64 v19, 0, -1, vcc ; D2000813 01A98280 v_cmp_eq_i32_e64 s[46:47], v19, 0 ; D104002E 00010113 s_and_saveexec_b64 s[46:47], s[46:47] ; BEAE242E s_xor_b64 s[46:47], exec, s[46:47] ; 89AE2E7E v_add_f32_e32 v20, 1.0, v18 ; 062824F2 v_add_f32_e32 v18, s38, v18 ; 06242426 v_mad_f32 v16, v18, s22, v7 ; D2820010 041C2D12 image_sample_b v18, 8, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[24:31], s[32:35] ; F0940800 0106120E s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v19, v18, v13 ; 06261B12 v_mov_b32_e32 v18, v20 ; 7E240314 s_or_b64 exec, exec, s[46:47] ; 88FE2E7E s_or_b64 s[44:45], s[46:47], s[44:45] ; 88AC2C2E s_andn2_b64 exec, exec, s[44:45] ; 8AFE2C7E s_cbranch_execnz BB0_5 ; BF890000 s_or_b64 exec, exec, s[44:45] ; 88FE2C7E v_add_f32_e32 v1, 1.0, v1 ; 060202F2 s_or_b64 exec, exec, s[42:43] ; 88FE2A7E s_or_b64 s[36:37], s[42:43], s[36:37] ; 88A4242A s_andn2_b64 exec, exec, s[36:37] ; 8AFE247E s_cbranch_execnz BB0_1 ; BF890000 s_or_b64 exec, exec, s[36:37] ; 88FE247E v_mul_f32_e32 v15, s4, v7 ; 101E0E04 v_mul_f32_e32 v14, s5, v9 ; 101C1205 v_mov_b32_e32 v13, 0 ; 7E1A0280 image_sample_b v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[13:16], s[8:15], s[16:19] ; F0940F00 00820D0D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, -v12, s6, v16 ; D2820000 24400D0C v_mul_f32_e32 v0, s7, v0 ; 10000007 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v7, v16, v1 ; 100E0310 v_mad_f32 v7, v0, s3, v7 ; D2820007 041C0700 v_mul_f32_e32 v7, v16, v7 ; 100E0F10 v_mul_f32_e32 v7, v2, v7 ; 100E0F02 v_mul_f32_e32 v9, v14, v1 ; 1012030E v_mad_f32 v9, v0, s2, v9 ; D2820009 04240500 v_mul_f32_e32 v9, v16, v9 ; 10121310 v_mul_f32_e32 v8, v8, v9 ; 10101308 v_mul_f32_e32 v8, v2, v8 ; 10101102 v_mad_f32 v8, v11, v7, v8 ; D2820008 04220F0B v_mul_f32_e32 v9, v13, v1 ; 1012030D v_mad_f32 v9, v0, s1, v9 ; D2820009 04240300 v_mul_f32_e32 v9, v16, v9 ; 10121310 v_mul_f32_e32 v6, v6, v9 ; 100C1306 v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mad_f32 v6, v10, v7, v6 ; D2820006 041A0F0A v_cvt_pkrtz_f16_f32_e32 v6, v6, v8 ; 5E0C1106 v_mul_f32_e32 v1, v15, v1 ; 1002030F v_mad_f32 v0, v0, s0, v1 ; D2820000 04040100 v_mul_f32_e32 v0, v16, v0 ; 10000110 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mad_f32 v0, v5, v7, v0 ; D2820000 04020F05 v_mad_f32 v1, v4, v7, v7 ; D2820001 041E0F04 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v6, v0, v6, v0 ; F8001C0F 00060006 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..47] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 510.0200, 0.1000} IMM[1] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IMM[0].zzzz, IN[1].zzzz 2: ADD TEMP[2].x, TEMP[1].xxxx, IMM[0].wwww 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[2], CONST[ADDR[0].x] 7: ADD TEMP[1].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[1].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[1].xxxx 10: DP4 TEMP[1].x, IN[2], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[1].xxxx 12: MOV OUT[2], IN[1] 13: MOV OUT[1], IN[0] 14: MOV OUT[0], TEMP[0] 15: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float 0x407FE051E0000000, %27 %38 = fadd float %37, 0x3FB99999A0000000 %39 = fptosi float %38 to i32 %40 = bitcast i32 %39 to float %41 = bitcast float %40 to i32 %42 = shl i32 %41, 4 %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %42) %44 = shl i32 %41, 4 %45 = add i32 %44, 4 %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %45) %47 = shl i32 %41, 4 %48 = add i32 %47, 8 %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %48) %50 = shl i32 %41, 4 %51 = add i32 %50, 12 %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %51) %53 = fmul float %33, %43 %54 = fmul float %34, %46 %55 = fadd float %53, %54 %56 = fmul float %35, %49 %57 = fadd float %55, %56 %58 = fmul float %36, %52 %59 = fadd float %57, %58 %60 = fadd float 0x3FF19999A0000000, %37 %61 = fptosi float %60 to i32 %62 = bitcast i32 %61 to float %63 = bitcast float %62 to i32 %64 = shl i32 %63, 4 %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %64) %66 = shl i32 %63, 4 %67 = add i32 %66, 4 %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %67) %69 = shl i32 %63, 4 %70 = add i32 %69, 8 %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %70) %72 = shl i32 %63, 4 %73 = add i32 %72, 12 %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %73) %75 = fmul float %33, %65 %76 = fmul float %34, %68 %77 = fadd float %75, %76 %78 = fmul float %35, %71 %79 = fadd float %77, %78 %80 = fmul float %36, %74 %81 = fadd float %79, %80 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %25, float %26, float %27, float %28) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %81, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 v_mov_b32_e32 v5, 0x3f8ccccd ; 7E0A02FF 3F8CCCCD v_mov_b32_e32 v6, 0x43ff028f ; 7E0C02FF 43FF028F v_mad_f32 v5, v6, v3, v5 ; D2820005 04160706 v_cvt_i32_f32_e32 v5, v5 ; 7E0A1105 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 v_or_b32_e32 v7, 4, v5 ; 380E0A84 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 buffer_load_format_xyzw v[8:11], v0, s[8:11], 0 idxen ; E00C2000 80020800 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v7, v9 ; 10001307 buffer_load_dword v7, v5, s[0:3], 0 offen ; E0301000 80000705 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v8, v7, v0 ; D2820000 04020F08 v_or_b32_e32 v7, 8, v5 ; 380E0A88 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v10, v7, v0 ; D2820000 04020F0A v_or_b32_e32 v5, 12, v5 ; 380A0A8C buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v11, v5, v0 ; D2820000 04020B0B v_mov_b32_e32 v5, 0x3dcccccd ; 7E0A02FF 3DCCCCCD v_mad_f32 v1, v6, v3, v5 ; D2820001 04160706 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_or_b32_e32 v2, 4, v1 ; 38040284 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v2, v9 ; 10041302 buffer_load_dword v3, v1, s[0:3], 0 offen ; E0301000 80000301 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v8, v3, v2 ; D2820002 040A0708 v_or_b32_e32 v3, 8, v1 ; 38060288 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v10, v3, v2 ; D2820002 040A070A v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v11, v1, v2 ; D2820001 040A030B v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 12, 0, 1, 0, v1, v0, v3, v2 ; F80008CF 02030001 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MUL TEMP[1].x, IN[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = fmul float %25, %26 %28 = call i32 @llvm.SI.packf16(float %22, float %23) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %24, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 v_cvt_pkrtz_f16_f32_e32 v2, v3, v2 ; 5E040503 v_interp_p1_f32 v3, v0, 3, 1, [m0] ; C80C0700 v_interp_p2_f32 v3, [v3], v1, 3, 1, [m0] ; C80D0701 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_mul_f32_e32 v3, v3, v4 ; 10060903 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_cvt_pkrtz_f16_f32_e32 v0, v4, v3 ; 5E000704 exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[0] 2: DP4 TEMP[1].x, IN[1], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV TEMP[1].xy, IN[0].xyxx 5: MOV OUT[0], TEMP[0] 6: MOV OUT[1], TEMP[1] 7: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %28 = load <16 x i8> addrspace(2)* %27, !tbaa !0 %29 = add i32 %5, %7 %30 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %29) %31 = extractelement <4 x float> %30, i32 0 %32 = extractelement <4 x float> %30, i32 1 %33 = extractelement <4 x float> %30, i32 2 %34 = extractelement <4 x float> %30, i32 3 %35 = fmul float %31, %13 %36 = fmul float %32, %14 %37 = fadd float %35, %36 %38 = fmul float %33, %15 %39 = fadd float %37, %38 %40 = fmul float %34, %16 %41 = fadd float %39, %40 %42 = fmul float %31, %17 %43 = fmul float %32, %18 %44 = fadd float %42, %43 %45 = fmul float %33, %19 %46 = fadd float %44, %45 %47 = fmul float %34, %20 %48 = fadd float %46, %47 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %25, float %26, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %41, float %48, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v5, v5 ; F800020F 05050201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v1 ; 100C0204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v2, s4, v6 ; D2820006 04180902 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v6 ; D2820000 04180103 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v0, v4, v5, v1 ; F80008CF 01050400 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %37, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v4, v2, v3 ; 5E080702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..3] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[0] 2: DP4 TEMP[1].x, IN[0], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[2] 5: DP4 TEMP[2].x, IN[0], CONST[3] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[0], TEMP[0] 9: MOV OUT[1], TEMP[1] 10: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %33, %13 %38 = fmul float %34, %14 %39 = fadd float %37, %38 %40 = fmul float %35, %15 %41 = fadd float %39, %40 %42 = fmul float %36, %16 %43 = fadd float %41, %42 %44 = fmul float %33, %17 %45 = fmul float %34, %18 %46 = fadd float %44, %45 %47 = fmul float %35, %19 %48 = fadd float %46, %47 %49 = fmul float %36, %20 %50 = fadd float %48, %49 %51 = fmul float %33, %21 %52 = fmul float %34, %22 %53 = fadd float %51, %52 %54 = fmul float %35, %23 %55 = fadd float %53, %54 %56 = fmul float %36, %24 %57 = fadd float %55, %56 %58 = fmul float %33, %25 %59 = fmul float %34, %26 %60 = fadd float %58, %59 %61 = fmul float %35, %27 %62 = fadd float %60, %61 %63 = fmul float %36, %28 %64 = fadd float %62, %63 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %57, float %64, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %43, float %50, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 v_mov_b32_e32 v6, 0 ; 7E0C0280 exp 15, 32, 0, 0, 0, v5, v4, v6, v6 ; F800020F 06060405 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v5 ; D2820000 04140103 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v0, v4, v6, v1 ; F80008CF 01060400 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..4] DCL TEMP[0..2], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: DP4 TEMP[1].x, TEMP[0], CONST[1] 3: DP4 TEMP[2].x, TEMP[0], CONST[2] 4: MOV TEMP[1].y, TEMP[2].xxxx 5: DP4 TEMP[2].x, TEMP[0], CONST[3] 6: MOV TEMP[1].z, TEMP[2].xxxx 7: DP4 TEMP[2].x, TEMP[0], CONST[4] 8: MOV TEMP[1].w, TEMP[2].xxxx 9: ADD TEMP[0].x, TEMP[0].wwww, CONST[0].wwww 10: MAD TEMP[0], CONST[0], TEMP[0].xxxx, TEMP[1] 11: MOV OUT[0], TEMP[0] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %44 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %45 = load <8 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %47 = load <4 x i32> addrspace(2)* %46, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %50 = bitcast float %48 to i32 %51 = bitcast float %49 to i32 %52 = insertelement <2 x i32> undef, i32 %50, i32 0 %53 = insertelement <2 x i32> %52, i32 %51, i32 1 %54 = bitcast <8 x i32> %45 to <32 x i8> %55 = bitcast <4 x i32> %47 to <16 x i8> %56 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %53, <32 x i8> %54, <16 x i8> %55, i32 2) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = extractelement <4 x float> %56, i32 3 %61 = fmul float %57, %28 %62 = fmul float %58, %29 %63 = fadd float %61, %62 %64 = fmul float %59, %30 %65 = fadd float %63, %64 %66 = fmul float %60, %31 %67 = fadd float %65, %66 %68 = fmul float %57, %32 %69 = fmul float %58, %33 %70 = fadd float %68, %69 %71 = fmul float %59, %34 %72 = fadd float %70, %71 %73 = fmul float %60, %35 %74 = fadd float %72, %73 %75 = fmul float %57, %36 %76 = fmul float %58, %37 %77 = fadd float %75, %76 %78 = fmul float %59, %38 %79 = fadd float %77, %78 %80 = fmul float %60, %39 %81 = fadd float %79, %80 %82 = fmul float %57, %40 %83 = fmul float %58, %41 %84 = fadd float %82, %83 %85 = fmul float %59, %42 %86 = fadd float %84, %85 %87 = fmul float %60, %43 %88 = fadd float %86, %87 %89 = fadd float %60, %27 %90 = fmul float %24, %89 %91 = fadd float %90, %67 %92 = fmul float %25, %89 %93 = fadd float %92, %74 %94 = fmul float %26, %89 %95 = fadd float %94, %81 %96 = fmul float %27, %89 %97 = fadd float %96, %88 %98 = call i32 @llvm.SI.packf16(float %91, float %93) %99 = bitcast i32 %98 to float %100 = call i32 @llvm.SI.packf16(float %95, float %97) %101 = bitcast i32 %100 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %99, float %101, float %99, float %101) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800F00 00430002 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v3 ; 060A0604 v_mad_f32 v4, s4, v5, v4 ; D2820004 04120A04 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v1 ; 100C0204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v2, s4, v6 ; D2820006 04180902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s4, v6 ; D2820006 04180903 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v5, v6 ; D2820006 041A0A04 v_cvt_pkrtz_f16_f32_e32 v4, v6, v4 ; 5E080906 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v1 ; 100C0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v2, s4, v6 ; D2820006 04180902 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s4, v6 ; D2820006 04180903 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v5, v6 ; D2820006 041A0A04 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v1 ; 100E0204 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v2, s4, v7 ; D2820007 041C0902 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s4, v7 ; D2820000 041C0903 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s0, v5, v0 ; D2820000 04020A00 v_cvt_pkrtz_f16_f32_e32 v0, v0, v6 ; 5E000D00 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 1020.0400, 0.1000} IMM[1] FLT32 { 1.1000, 2.1000, 3.1000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IMM[0].zzzz, IN[0].zzzz 2: ADD TEMP[2].x, TEMP[1].xxxx, IMM[0].wwww 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[1], CONST[ADDR[0].x] 7: ADD TEMP[2].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[2].x, TEMP[2].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ADD TEMP[2].x, IMM[1].yyyy, TEMP[1].xxxx 13: F2I TEMP[2].x, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 17: ADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 18: F2I TEMP[1].x, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: DP4 TEMP[1].x, IN[1], CONST[ADDR[0].x] 21: MOV TEMP[2].y, TEMP[1].xxxx 22: MOV TEMP[1].xy, TEMP[2].xyxx 23: MOV OUT[1], IN[0] 24: MOV OUT[0], TEMP[0] 25: MOV OUT[2], TEMP[1] 26: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = fmul float 0x408FE051E0000000, %19 %30 = fadd float %29, 0x3FB99999A0000000 %31 = fptosi float %30 to i32 %32 = bitcast i32 %31 to float %33 = bitcast float %32 to i32 %34 = shl i32 %33, 4 %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %34) %36 = shl i32 %33, 4 %37 = add i32 %36, 4 %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %37) %39 = shl i32 %33, 4 %40 = add i32 %39, 8 %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %40) %42 = shl i32 %33, 4 %43 = add i32 %42, 12 %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %43) %45 = fmul float %25, %35 %46 = fmul float %26, %38 %47 = fadd float %45, %46 %48 = fmul float %27, %41 %49 = fadd float %47, %48 %50 = fmul float %28, %44 %51 = fadd float %49, %50 %52 = fadd float 0x3FF19999A0000000, %29 %53 = fptosi float %52 to i32 %54 = bitcast i32 %53 to float %55 = bitcast float %54 to i32 %56 = shl i32 %55, 4 %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %56) %58 = shl i32 %55, 4 %59 = add i32 %58, 4 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = shl i32 %55, 4 %62 = add i32 %61, 8 %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %62) %64 = shl i32 %55, 4 %65 = add i32 %64, 12 %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %65) %67 = fmul float %25, %57 %68 = fmul float %26, %60 %69 = fadd float %67, %68 %70 = fmul float %27, %63 %71 = fadd float %69, %70 %72 = fmul float %28, %66 %73 = fadd float %71, %72 %74 = fadd float 0x4000CCCCC0000000, %29 %75 = fptosi float %74 to i32 %76 = bitcast i32 %75 to float %77 = bitcast float %76 to i32 %78 = shl i32 %77, 4 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = shl i32 %77, 4 %81 = add i32 %80, 4 %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %81) %83 = shl i32 %77, 4 %84 = add i32 %83, 8 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = shl i32 %77, 4 %87 = add i32 %86, 12 %88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %87) %89 = fmul float %25, %79 %90 = fmul float %26, %82 %91 = fadd float %89, %90 %92 = fmul float %27, %85 %93 = fadd float %91, %92 %94 = fmul float %28, %88 %95 = fadd float %93, %94 %96 = fadd float 0x4008CCCCC0000000, %29 %97 = fptosi float %96 to i32 %98 = bitcast i32 %97 to float %99 = bitcast float %98 to i32 %100 = shl i32 %99, 4 %101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %100) %102 = shl i32 %99, 4 %103 = add i32 %102, 4 %104 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %103) %105 = shl i32 %99, 4 %106 = add i32 %105, 8 %107 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %106) %108 = shl i32 %99, 4 %109 = add i32 %108, 12 %110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %109) %111 = fmul float %25, %101 %112 = fmul float %26, %104 %113 = fadd float %111, %112 %114 = fmul float %27, %107 %115 = fadd float %113, %114 %116 = fmul float %28, %110 %117 = fadd float %115, %116 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %95, float %117, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %73, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 v_mov_b32_e32 v5, 0x40466666 ; 7E0A02FF 40466666 v_mov_b32_e32 v6, 0x447f028f ; 7E0C02FF 447F028F v_mad_f32 v5, v6, v3, v5 ; D2820005 04160706 v_cvt_i32_f32_e32 v5, v5 ; 7E0A1105 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 v_or_b32_e32 v7, 4, v5 ; 380E0A84 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 buffer_load_format_xyzw v[8:11], v0, s[8:11], 0 idxen ; E00C2000 80020800 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v7, v9 ; 10001307 buffer_load_dword v7, v5, s[0:3], 0 offen ; E0301000 80000705 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v8, v7, v0 ; D2820000 04020F08 v_or_b32_e32 v7, 8, v5 ; 380E0A88 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v10, v7, v0 ; D2820000 04020F0A v_or_b32_e32 v5, 12, v5 ; 380A0A8C buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v11, v5, v0 ; D2820000 04020B0B v_mov_b32_e32 v5, 0x40066666 ; 7E0A02FF 40066666 v_mad_f32 v5, v6, v3, v5 ; D2820005 04160706 v_cvt_i32_f32_e32 v5, v5 ; 7E0A1105 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 v_or_b32_e32 v7, 4, v5 ; 380E0A84 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v7, v7, v9 ; 100E1307 buffer_load_dword v12, v5, s[0:3], 0 offen ; E0301000 80000C05 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v8, v12, v7 ; D2820007 041E1908 v_or_b32_e32 v12, 8, v5 ; 38180A88 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v10, v12, v7 ; D2820007 041E190A v_or_b32_e32 v5, 12, v5 ; 380A0A8C buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, v11, v5, v7 ; D2820005 041E0B0B v_mov_b32_e32 v7, 0 ; 7E0E0280 exp 15, 33, 0, 0, 0, v5, v0, v7, v7 ; F800021F 07070005 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0x3f8ccccd ; 7E0002FF 3F8CCCCD v_mad_f32 v0, v6, v3, v0 ; D2820000 04020706 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_or_b32_e32 v5, 4, v0 ; 380A0084 buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v5, v5, v9 ; 100A1305 buffer_load_dword v12, v0, s[0:3], 0 offen ; E0301000 80000C00 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, v8, v12, v5 ; D2820005 04161908 v_or_b32_e32 v12, 8, v0 ; 38180088 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, v10, v12, v5 ; D2820005 0416190A v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v11, v0, v5 ; D2820000 0416010B v_mov_b32_e32 v5, 0x3dcccccd ; 7E0A02FF 3DCCCCCD v_mad_f32 v1, v6, v3, v5 ; D2820001 04160706 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_or_b32_e32 v2, 4, v1 ; 38040284 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v2, v9 ; 10041302 buffer_load_dword v3, v1, s[0:3], 0 offen ; E0301000 80000301 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v8, v3, v2 ; D2820002 040A0708 v_or_b32_e32 v3, 8, v1 ; 38060288 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v10, v3, v2 ; D2820002 040A070A v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v11, v1, v2 ; D2820001 040A030B v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 12, 0, 1, 0, v1, v0, v7, v2 ; F80008CF 02070001 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xyz, TEMP[0].xyzx 3: MUL TEMP[0].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[1].w, TEMP[0].xxxx 5: MOV OUT[0], TEMP[1] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = bitcast <8 x i32> %23 to <32 x i8> %34 = bitcast <4 x i32> %25 to <16 x i8> %35 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %32, <32 x i8> %33, <16 x i8> %34, i32 2) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = extractelement <4 x float> %35, i32 2 %39 = extractelement <4 x float> %35, i32 3 %40 = fmul float %39, %26 %41 = call i32 @llvm.SI.packf16(float %36, float %37) %42 = bitcast i32 %41 to float %43 = call i32 @llvm.SI.packf16(float %38, float %40) %44 = bitcast i32 %43 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %42, float %44, float %42, float %44) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010202 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v6, v2, v3 ; 5E0C0702 v_interp_p1_f32 v7, v0, 3, 0, [m0] ; C81C0300 v_interp_p2_f32 v7, [v7], v1, 3, 0, [m0] ; C81D0301 v_mul_f32_e32 v0, v7, v5 ; 10000B07 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v6, v0, v6, v0 ; F8001C0F 00060006 s_endpgm ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %21 %46 = fmul float %42, %22 %47 = fadd float %45, %46 %48 = fmul float %43, %23 %49 = fadd float %47, %48 %50 = fmul float %44, %24 %51 = fadd float %49, %50 %52 = fmul float %41, %25 %53 = fmul float %42, %26 %54 = fadd float %52, %53 %55 = fmul float %43, %27 %56 = fadd float %54, %55 %57 = fmul float %44, %28 %58 = fadd float %56, %57 %59 = fmul float %41, %29 %60 = fmul float %42, %30 %61 = fadd float %59, %60 %62 = fmul float %43, %31 %63 = fadd float %61, %62 %64 = fmul float %44, %32 %65 = fadd float %63, %64 %66 = fmul float %41, %33 %67 = fmul float %42, %34 %68 = fadd float %66, %67 %69 = fmul float %43, %35 %70 = fadd float %68, %69 %71 = fmul float %44, %36 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %65, float %72, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 v_mov_b32_e32 v6, 0 ; 7E0C0280 exp 15, 34, 0, 0, 0, v5, v4, v6, v6 ; F800022F 06060405 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s0, s[0:3], 0xb ; C200010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v5 ; D2820000 04140103 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v0, v4, v6, v1 ; F80008CF 01060400 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL CONST[2] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[2].xyxx 1: MOV TEMP[1], IMM[0].xxxx 2: MOV TEMP[2], IMM[0].xxxx 3: MOV TEMP[3].y, IMM[0].xxxx 4: MOV TEMP[3].x, -CONST[0].xxxx 5: BGNLOOP :0 6: FSLT TEMP[4].x, CONST[0].xxxx, TEMP[3].xxxx 7: UIF TEMP[4].xxxx :0 8: BRK 9: ENDIF 10: MOV TEMP[3].y, -CONST[0].yyyy 11: BGNLOOP :0 12: FSLT TEMP[5].x, CONST[0].yyyy, TEMP[3].yyyy 13: UIF TEMP[5].xxxx :0 14: BRK 15: ENDIF 16: MAD TEMP[6].xy, TEMP[3].xyyy, CONST[2].xyyy, TEMP[0].xyyy 17: MOV TEMP[7].xy, TEMP[6].xyyy 18: MOV TEMP[7].w, IMM[0].xxxx 19: TXB TEMP[8], TEMP[7], SAMP[0], 2D 20: ADD TEMP[2], TEMP[2], TEMP[8] 21: ADD TEMP[9].x, TEMP[3].yyyy, IMM[0].yyyy 22: MOV TEMP[3].y, TEMP[9].xxxx 23: ENDLOOP :0 24: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy 25: ENDLOOP :0 26: MUL TEMP[1], TEMP[2], CONST[0].wwww 27: MOV TEMP[0].w, IMM[0].yyyy 28: MOV TEMP[0].xyz, IN[1].xyzx 29: MUL TEMP[0], TEMP[1], TEMP[0] 30: MUL TEMP[1], TEMP[0], IN[1].wwww 31: MAD TEMP[1], IN[0], TEMP[1].wwww, TEMP[1] 32: MOV OUT[0], TEMP[1] 33: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %29 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %30 = load <8 x i32> addrspace(2)* %29, !tbaa !0 %31 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %32 = load <4 x i32> addrspace(2)* %31, !tbaa !0 %33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %43 = fsub float -0.000000e+00, %24 %44 = fsub float -0.000000e+00, %25 %45 = bitcast <8 x i32> %30 to <32 x i8> %46 = bitcast <4 x i32> %32 to <16 x i8> br label %LOOP LOOP: ; preds = %IF43, %main_body %temp10.0 = phi float [ 0.000000e+00, %main_body ], [ %temp10.1.lcssa, %IF43 ] %temp11.0 = phi float [ 0.000000e+00, %main_body ], [ %temp11.1.lcssa, %IF43 ] %temp12.0 = phi float [ %43, %main_body ], [ %85, %IF43 ] %temp9.0 = phi float [ 0.000000e+00, %main_body ], [ %temp9.1.lcssa, %IF43 ] %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %temp8.1.lcssa, %IF43 ] %47 = fcmp olt float %24, %temp12.0 %48 = sext i1 %47 to i32 %49 = bitcast i32 %48 to float %50 = bitcast float %49 to i32 %51 = icmp ne i32 %50, 0 br i1 %51, label %IF, label %ENDIF IF: ; preds = %LOOP %temp8.0.lcssa = phi float [ %temp8.0, %LOOP ] %temp9.0.lcssa = phi float [ %temp9.0, %LOOP ] %temp11.0.lcssa = phi float [ %temp11.0, %LOOP ] %temp10.0.lcssa = phi float [ %temp10.0, %LOOP ] %52 = fmul float %temp8.0.lcssa, %26 %53 = fmul float %temp9.0.lcssa, %26 %54 = fmul float %temp10.0.lcssa, %26 %55 = fmul float %temp11.0.lcssa, %26 %56 = fmul float %52, %37 %57 = fmul float %53, %38 %58 = fmul float %54, %39 %59 = fmul float %55, 1.000000e+00 %60 = fmul float %56, %40 %61 = fmul float %57, %40 %62 = fmul float %58, %40 %63 = fmul float %59, %40 %64 = fmul float %33, %63 %65 = fadd float %64, %60 %66 = fmul float %34, %63 %67 = fadd float %66, %61 %68 = fmul float %35, %63 %69 = fadd float %68, %62 %70 = fmul float %36, %63 %71 = fadd float %70, %63 %72 = call i32 @llvm.SI.packf16(float %65, float %67) %73 = bitcast i32 %72 to float %74 = call i32 @llvm.SI.packf16(float %69, float %71) %75 = bitcast i32 %74 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %73, float %75, float %73, float %75) ret void ENDIF: ; preds = %LOOP %76 = fmul float %temp12.0, %27 %77 = fadd float %76, %41 %78 = bitcast float %77 to i32 %79 = insertelement <4 x i32> , i32 %78, i32 1 br label %LOOP41 LOOP41: ; preds = %ENDIF42, %ENDIF %temp10.1 = phi float [ %temp10.0, %ENDIF ], [ %98, %ENDIF42 ] %temp11.1 = phi float [ %temp11.0, %ENDIF ], [ %99, %ENDIF42 ] %temp13.0 = phi float [ %44, %ENDIF ], [ %100, %ENDIF42 ] %temp9.1 = phi float [ %temp9.0, %ENDIF ], [ %97, %ENDIF42 ] %temp8.1 = phi float [ %temp8.0, %ENDIF ], [ %96, %ENDIF42 ] %80 = fcmp olt float %25, %temp13.0 %81 = sext i1 %80 to i32 %82 = bitcast i32 %81 to float %83 = bitcast float %82 to i32 %84 = icmp ne i32 %83, 0 br i1 %84, label %IF43, label %ENDIF42 IF43: ; preds = %LOOP41 %temp8.1.lcssa = phi float [ %temp8.1, %LOOP41 ] %temp9.1.lcssa = phi float [ %temp9.1, %LOOP41 ] %temp11.1.lcssa = phi float [ %temp11.1, %LOOP41 ] %temp10.1.lcssa = phi float [ %temp10.1, %LOOP41 ] %85 = fadd float %temp12.0, 1.000000e+00 br label %LOOP ENDIF42: ; preds = %LOOP41 %86 = fmul float %temp13.0, %28 %87 = fadd float %86, %42 %88 = bitcast float %87 to i32 %89 = insertelement <4 x i32> %79, i32 %88, i32 2 %90 = insertelement <4 x i32> %89, i32 undef, i32 3 %91 = call <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32> %90, <32 x i8> %45, <16 x i8> %46, i32 2) %92 = extractelement <4 x float> %91, i32 0 %93 = extractelement <4 x float> %91, i32 1 %94 = extractelement <4 x float> %91, i32 2 %95 = extractelement <4 x float> %91, i32 3 %96 = fadd float %temp8.1, %92 %97 = fadd float %temp9.1, %93 %98 = fadd float %temp10.1, %94 %99 = fadd float %temp11.1, %95 %100 = fadd float %temp13.0, 1.000000e+00 br label %LOOP41 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v4, v0, 1, 2, [m0] ; C8100900 v_interp_p2_f32 v4, [v4], v1, 1, 2, [m0] ; C8110901 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v2, v0, 3, 1, [m0] ; C8080700 v_interp_p2_f32 v2, [v2], v1, 3, 1, [m0] ; C8090701 v_interp_p1_f32 v3, v0, 2, 1, [m0] ; C80C0600 v_interp_p2_f32 v3, [v3], v1, 2, 1, [m0] ; C80D0601 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v8, v0, 0, 1, [m0] ; C8200400 v_interp_p2_f32 v8, [v8], v1, 0, 1, [m0] ; C8210401 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v9, v0, 2, 0, [m0] ; C8240200 v_interp_p2_f32 v9, [v9], v1, 2, 0, [m0] ; C8250201 v_interp_p1_f32 v10, v0, 1, 0, [m0] ; C8280100 v_interp_p2_f32 v10, [v10], v1, 1, 0, [m0] ; C8290101 v_interp_p1_f32 v11, v0, 0, 0, [m0] ; C82C0000 v_interp_p2_f32 v11, [v11], v1, 0, 0, [m0] ; C82D0001 s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[20:23], 0x1 ; C2009501 v_mov_b32_e32 v0, 0x80000000 ; 7E0002FF 80000000 s_waitcnt lgkmcnt(0) ; BF8C007F v_xor_b32_e32 v12, s1, v0 ; 3A180001 s_buffer_load_dword s2, s[20:23], 0x0 ; C2011500 s_waitcnt lgkmcnt(0) ; BF8C007F v_xor_b32_e32 v13, s2, v0 ; 3A1A0002 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500 s_buffer_load_dword s3, s[20:23], 0x9 ; C2019509 s_buffer_load_dword s18, s[20:23], 0x8 ; C2091508 s_buffer_load_dword s0, s[20:23], 0x3 ; C2001503 v_mov_b32_e32 v16, 0 ; 7E200280 s_mov_b64 s[20:21], 0 ; BE940480 s_mov_b64 s[16:17], s[20:21] ; BE900414 v_mov_b32_e32 v17, v16 ; 7E220310 v_mov_b32_e32 v18, v16 ; 7E240310 v_mov_b32_e32 v19, v16 ; 7E260310 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, v19 ; 7E020313 v_mov_b32_e32 v14, v18 ; 7E1C0312 v_mov_b32_e32 v15, v17 ; 7E1E0311 v_mov_b32_e32 v0, v16 ; 7E000310 v_cmp_lt_f32_e32 vcc, s2, v13 ; 7C021A02 v_cndmask_b32_e64 v16, 0, -1, vcc ; D2000010 01A98280 v_cmp_eq_i32_e64 s[22:23], v16, 0 ; D1040016 00010110 s_and_saveexec_b64 s[22:23], s[22:23] ; BE962416 s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E s_cbranch_execz BB0_4 ; BF880000 v_mad_f32 v21, v13, s18, v7 ; D2820015 041C250D v_mov_b32_e32 v20, 0 ; 7E280280 s_mov_b64 s[24:25], s[20:21] ; BE980414 v_mov_b32_e32 v26, v0 ; 7E340300 v_mov_b32_e32 v25, v15 ; 7E32030F v_mov_b32_e32 v24, v12 ; 7E30030C v_mov_b32_e32 v27, v14 ; 7E36030E v_mov_b32_e32 v28, v1 ; 7E380301 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v19, v28 ; 7E26031C v_mov_b32_e32 v18, v27 ; 7E24031B v_mov_b32_e32 v17, v25 ; 7E220319 v_mov_b32_e32 v16, v26 ; 7E20031A v_cmp_lt_f32_e32 vcc, s1, v24 ; 7C023001 v_cndmask_b32_e64 v25, 0, -1, vcc ; D2000819 01A98280 v_cmp_eq_i32_e64 s[26:27], v25, 0 ; D104001A 00010119 s_and_saveexec_b64 s[26:27], s[26:27] ; BE9A241A s_xor_b64 s[26:27], exec, s[26:27] ; 899A1A7E v_mad_f32 v22, v24, s3, v4 ; D2820016 04100718 image_sample_b v[28:31], 15, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[8:15], s[4:7] ; F0940F00 00221C14 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v25, v31, v17 ; 0632231F v_add_f32_e32 v26, v30, v16 ; 0634211E v_add_f32_e32 v27, v29, v18 ; 0636251D v_add_f32_e32 v28, v28, v19 ; 0638271C v_add_f32_e32 v24, 1.0, v24 ; 063030F2 s_or_b64 exec, exec, s[26:27] ; 88FE1A7E s_or_b64 s[24:25], s[26:27], s[24:25] ; 8898181A s_andn2_b64 exec, exec, s[24:25] ; 8AFE187E s_cbranch_execnz BB0_5 ; BF890000 s_or_b64 exec, exec, s[24:25] ; 88FE187E v_add_f32_e32 v13, 1.0, v13 ; 061A1AF2 s_or_b64 exec, exec, s[22:23] ; 88FE167E s_or_b64 s[16:17], s[22:23], s[16:17] ; 88901016 s_andn2_b64 exec, exec, s[16:17] ; 8AFE107E s_cbranch_execnz BB0_1 ; BF890000 s_or_b64 exec, exec, s[16:17] ; 88FE107E v_mul_f32_e32 v4, s0, v15 ; 10081E00 v_mul_f32_e32 v4, v2, v4 ; 10080902 v_mul_f32_e32 v7, s0, v14 ; 100E1C00 v_mul_f32_e32 v6, v6, v7 ; 100C0F06 v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mad_f32 v6, v10, v4, v6 ; D2820006 041A090A v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v1, v8, v1 ; 10020308 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_mad_f32 v1, v11, v4, v1 ; D2820001 0406090B v_cvt_pkrtz_f16_f32_e32 v1, v1, v6 ; 5E020D01 v_mul_f32_e32 v0, s0, v0 ; 10000000 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mad_f32 v0, v9, v4, v0 ; D2820000 04020909 v_mad_f32 v2, v5, v4, v4 ; D2820002 04120905 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %21 %46 = fmul float %42, %22 %47 = fadd float %45, %46 %48 = fmul float %43, %23 %49 = fadd float %47, %48 %50 = fmul float %44, %24 %51 = fadd float %49, %50 %52 = fmul float %41, %25 %53 = fmul float %42, %26 %54 = fadd float %52, %53 %55 = fmul float %43, %27 %56 = fadd float %54, %55 %57 = fmul float %44, %28 %58 = fadd float %56, %57 %59 = fmul float %41, %29 %60 = fmul float %42, %30 %61 = fadd float %59, %60 %62 = fmul float %43, %31 %63 = fadd float %61, %62 %64 = fmul float %44, %32 %65 = fadd float %63, %64 %66 = fmul float %41, %33 %67 = fmul float %42, %34 %68 = fadd float %66, %67 %69 = fmul float %43, %35 %70 = fadd float %68, %69 %71 = fmul float %44, %36 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %65, float %72, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 v_mov_b32_e32 v6, 0 ; 7E0C0280 exp 15, 34, 0, 0, 0, v5, v4, v6, v6 ; F800022F 06060405 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s0, s[0:3], 0xb ; C200010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v5 ; D2820000 04140103 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v0, v4, v6, v1 ; F80008CF 01060400 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].w, IMM[0].xxxx 1: MOV TEMP[0].xyz, IN[1].xyzx 2: MOV TEMP[1].xy, IN[2].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: MUL TEMP[0], TEMP[1], TEMP[0] 5: MUL TEMP[0], TEMP[0], IN[1].wwww 6: MAD TEMP[0], IN[0], TEMP[0].wwww, TEMP[0] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %36 = bitcast float %34 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %23 to <32 x i8> %41 = bitcast <4 x i32> %25 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = fmul float %43, %30 %48 = fmul float %44, %31 %49 = fmul float %45, %32 %50 = fmul float %46, 1.000000e+00 %51 = fmul float %47, %33 %52 = fmul float %48, %33 %53 = fmul float %49, %33 %54 = fmul float %50, %33 %55 = fmul float %26, %54 %56 = fadd float %55, %51 %57 = fmul float %27, %54 %58 = fadd float %57, %52 %59 = fmul float %28, %54 %60 = fadd float %59, %53 %61 = fmul float %29, %54 %62 = fadd float %61, %54 %63 = call i32 @llvm.SI.packf16(float %56, float %58) %64 = bitcast i32 %63 to float %65 = call i32 @llvm.SI.packf16(float %60, float %62) %66 = bitcast i32 %65 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %64, float %66, float %64, float %66) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 2, [m0] ; C80C0900 v_interp_p2_f32 v3, [v3], v1, 1, 2, [m0] ; C80D0901 v_interp_p1_f32 v2, v0, 0, 2, [m0] ; C8080800 v_interp_p2_f32 v2, [v2], v1, 0, 2, [m0] ; C8090801 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010202 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v7, v6, v5 ; 100E0B06 v_interp_p1_f32 v8, v0, 1, 1, [m0] ; C8200500 v_interp_p2_f32 v8, [v8], v1, 1, 1, [m0] ; C8210501 v_mul_f32_e32 v8, v8, v3 ; 10100708 v_mul_f32_e32 v8, v6, v8 ; 10101106 v_interp_p1_f32 v9, v0, 1, 0, [m0] ; C8240100 v_interp_p2_f32 v9, [v9], v1, 1, 0, [m0] ; C8250101 v_mad_f32 v8, v9, v7, v8 ; D2820008 04220F09 v_interp_p1_f32 v9, v0, 0, 1, [m0] ; C8240400 v_interp_p2_f32 v9, [v9], v1, 0, 1, [m0] ; C8250401 v_mul_f32_e32 v9, v9, v2 ; 10120509 v_mul_f32_e32 v9, v6, v9 ; 10121306 v_interp_p1_f32 v10, v0, 0, 0, [m0] ; C8280000 v_interp_p2_f32 v10, [v10], v1, 0, 0, [m0] ; C8290001 v_mad_f32 v9, v10, v7, v9 ; D2820009 04260F0A v_cvt_pkrtz_f16_f32_e32 v8, v9, v8 ; 5E101109 v_interp_p1_f32 v9, v0, 2, 1, [m0] ; C8240600 v_interp_p2_f32 v9, [v9], v1, 2, 1, [m0] ; C8250601 v_mul_f32_e32 v2, v9, v4 ; 10040909 v_mul_f32_e32 v2, v6, v2 ; 10040506 v_interp_p1_f32 v3, v0, 2, 0, [m0] ; C80C0200 v_interp_p2_f32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 v_mad_f32 v2, v3, v7, v2 ; D2820002 040A0F03 v_interp_p1_f32 v3, v0, 3, 0, [m0] ; C80C0300 v_interp_p2_f32 v3, [v3], v1, 3, 0, [m0] ; C80D0301 v_mad_f32 v0, v3, v7, v7 ; D2820000 041E0F03 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 exp 15, 0, 1, 1, 1, v8, v0, v8, v0 ; F8001C0F 00080008 s_endpgm ; BF810000 [0428/202613:ERROR:renderer_main.cc(212)] Running without renderer sandbox VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..47] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 2.0000, 0.1000} IMM[1] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IN[2].xxxx, IMM[0].zzzz 2: ADD TEMP[2].x, TEMP[1].xxxx, IMM[0].wwww 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[1], CONST[ADDR[0].x] 7: ADD TEMP[1].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[1].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[1].xxxx 10: DP4 TEMP[1].x, IN[1], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[1].xxxx 12: MOV TEMP[1].xy, IN[0].xyxx 13: MOV OUT[0], TEMP[0] 14: MOV OUT[1], TEMP[1] 15: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %20 = load <16 x i8> addrspace(2)* %19, !tbaa !0 %21 = add i32 %5, %7 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = extractelement <4 x float> %22, i32 2 %26 = extractelement <4 x float> %22, i32 3 %27 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %28 = load <16 x i8> addrspace(2)* %27, !tbaa !0 %29 = add i32 %5, %7 %30 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %29) %31 = extractelement <4 x float> %30, i32 0 %32 = fmul float %31, 2.000000e+00 %33 = fadd float %32, 0x3FB99999A0000000 %34 = fptosi float %33 to i32 %35 = bitcast i32 %34 to float %36 = bitcast float %35 to i32 %37 = shl i32 %36, 4 %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %37) %39 = shl i32 %36, 4 %40 = add i32 %39, 4 %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %40) %42 = shl i32 %36, 4 %43 = add i32 %42, 8 %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %43) %45 = shl i32 %36, 4 %46 = add i32 %45, 12 %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %46) %48 = fmul float %23, %38 %49 = fmul float %24, %41 %50 = fadd float %48, %49 %51 = fmul float %25, %44 %52 = fadd float %50, %51 %53 = fmul float %26, %47 %54 = fadd float %52, %53 %55 = fadd float 0x3FF19999A0000000, %32 %56 = fptosi float %55 to i32 %57 = bitcast i32 %56 to float %58 = bitcast float %57 to i32 %59 = shl i32 %58, 4 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = shl i32 %58, 4 %62 = add i32 %61, 4 %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %62) %64 = shl i32 %58, 4 %65 = add i32 %64, 8 %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %65) %67 = shl i32 %58, 4 %68 = add i32 %67, 12 %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %68) %70 = fmul float %23, %60 %71 = fmul float %24, %63 %72 = fadd float %70, %71 %73 = fmul float %25, %66 %74 = fadd float %72, %73 %75 = fmul float %26, %69 %76 = fadd float %74, %75 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %54, float %76, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v5, v5 ; F800020F 05050201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 v_mov_b32_e32 v6, 0x3f8ccccd ; 7E0C02FF 3F8CCCCD s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, 2.0, v1, v6 ; D2820006 041A02F4 v_cvt_i32_f32_e32 v6, v6 ; 7E0C1106 v_lshlrev_b32_e32 v6, 4, v6 ; 340C0C84 v_or_b32_e32 v7, 4, v6 ; 380E0C84 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 buffer_load_format_xyzw v[8:11], v0, s[12:15], 0 idxen ; E00C2000 80030800 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v7, v9 ; 10001307 buffer_load_dword v7, v6, s[0:3], 0 offen ; E0301000 80000706 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v8, v7, v0 ; D2820000 04020F08 v_or_b32_e32 v7, 8, v6 ; 380E0C88 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v10, v7, v0 ; D2820000 04020F0A v_or_b32_e32 v6, 12, v6 ; 380C0C8C buffer_load_dword v6, v6, s[0:3], 0 offen ; E0301000 80000606 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v11, v6, v0 ; D2820000 04020D0B v_mov_b32_e32 v6, 0x3dcccccd ; 7E0C02FF 3DCCCCCD v_mad_f32 v1, 2.0, v1, v6 ; D2820001 041A02F4 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_or_b32_e32 v2, 4, v1 ; 38040284 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v2, v9 ; 10041302 buffer_load_dword v3, v1, s[0:3], 0 offen ; E0301000 80000301 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v8, v3, v2 ; D2820002 040A0708 v_or_b32_e32 v3, 8, v1 ; 38060288 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v10, v3, v2 ; D2820002 040A070A v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v11, v1, v2 ; D2820001 040A030B v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 12, 0, 1, 0, v1, v0, v5, v2 ; F80008CF 02050001 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %37, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v4, v2, v3 ; 5E080702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL CONST[0..7] DCL TEMP[0..2], LOCAL 0: DP4 TEMP[0].x, IN[1], CONST[2] 1: DP4 TEMP[1].x, IN[1], CONST[3] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[1], CONST[4] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[1], CONST[5] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: DP4 TEMP[1].x, IN[1], CONST[6] 8: DP4 TEMP[2].x, IN[1], CONST[7] 9: MOV TEMP[1].y, TEMP[2].xxxx 10: MOV TEMP[1].xy, TEMP[1].xyxx 11: MOV OUT[1], IN[0] 12: MOV OUT[2], CONST[0] 13: MOV OUT[3], CONST[1] 14: MOV OUT[0], TEMP[0] 15: MOV OUT[4], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = extractelement <4 x float> %56, i32 3 %61 = fmul float %57, %21 %62 = fmul float %58, %22 %63 = fadd float %61, %62 %64 = fmul float %59, %23 %65 = fadd float %63, %64 %66 = fmul float %60, %24 %67 = fadd float %65, %66 %68 = fmul float %57, %25 %69 = fmul float %58, %26 %70 = fadd float %68, %69 %71 = fmul float %59, %27 %72 = fadd float %70, %71 %73 = fmul float %60, %28 %74 = fadd float %72, %73 %75 = fmul float %57, %29 %76 = fmul float %58, %30 %77 = fadd float %75, %76 %78 = fmul float %59, %31 %79 = fadd float %77, %78 %80 = fmul float %60, %32 %81 = fadd float %79, %80 %82 = fmul float %57, %33 %83 = fmul float %58, %34 %84 = fadd float %82, %83 %85 = fmul float %59, %35 %86 = fadd float %84, %85 %87 = fmul float %60, %36 %88 = fadd float %86, %87 %89 = fmul float %57, %37 %90 = fmul float %58, %38 %91 = fadd float %89, %90 %92 = fmul float %59, %39 %93 = fadd float %91, %92 %94 = fmul float %60, %40 %95 = fadd float %93, %94 %96 = fmul float %57, %41 %97 = fmul float %58, %42 %98 = fadd float %96, %97 %99 = fmul float %59, %43 %100 = fadd float %98, %99 %101 = fmul float %60, %44 %102 = fadd float %100, %101 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %49, float %50, float %51, float %52) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %95, float %102, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %67, float %74, float %81, float %88) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 34, 0, 0, 0, v1, v2, v3, v4 ; F800022F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 v_mov_b32_e32 v6, 0 ; 7E0C0280 exp 15, 35, 0, 0, 0, v5, v4, v6, v6 ; F800023F 06060405 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v1 ; 100C0204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v2, s4, v6 ; D2820006 04180902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s4, v6 ; D2820006 04180903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v1 ; 100E0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v2, s4, v7 ; D2820007 041C0902 s_buffer_load_dword s0, s[0:3], 0xb ; C200010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v7 ; D2820000 041C0103 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[2], IN[1] 3: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = bitcast <8 x i32> %23 to <32 x i8> %42 = bitcast <4 x i32> %25 to <16 x i8> %43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %41, <16 x i8> %42, i32 2) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = extractelement <4 x float> %43, i32 2 %47 = extractelement <4 x float> %43, i32 3 %48 = fmul float %44, %31 %49 = fadd float %48, %27 %50 = fmul float %45, %32 %51 = fadd float %50, %28 %52 = fmul float %46, %33 %53 = fadd float %52, %29 %54 = fmul float %47, %34 %55 = fadd float %54, %30 %56 = fmul float %55, %26 %57 = call i32 @llvm.SI.packf16(float %49, float %51) %58 = bitcast i32 %57 to float %59 = call i32 @llvm.SI.packf16(float %53, float %56) %60 = bitcast i32 %59 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %58, float %60, float %58, float %60) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 3, [m0] ; C80C0D00 v_interp_p2_f32 v3, [v3], v1, 1, 3, [m0] ; C80D0D01 v_interp_p1_f32 v2, v0, 0, 3, [m0] ; C8080C00 v_interp_p2_f32 v2, [v2], v1, 0, 3, [m0] ; C8090C01 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010202 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, v3, v7, v6 ; D2820006 041A0F03 v_interp_p1_f32 v7, v0, 0, 1, [m0] ; C81C0400 v_interp_p2_f32 v7, [v7], v1, 0, 1, [m0] ; C81D0401 v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800 v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801 v_mad_f32 v7, v2, v8, v7 ; D2820007 041E1102 v_cvt_pkrtz_f16_f32_e32 v6, v7, v6 ; 5E0C0D07 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 v_interp_p1_f32 v8, v0, 2, 2, [m0] ; C8200A00 v_interp_p2_f32 v8, [v8], v1, 2, 2, [m0] ; C8210A01 v_mad_f32 v7, v4, v8, v7 ; D2820007 041E1104 v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700 v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701 v_interp_p1_f32 v9, v0, 3, 2, [m0] ; C8240B00 v_interp_p2_f32 v9, [v9], v1, 3, 2, [m0] ; C8250B01 v_mad_f32 v2, v5, v9, v8 ; D2820002 04221305 v_interp_p1_f32 v3, v0, 3, 0, [m0] ; C80C0300 v_interp_p2_f32 v3, [v3], v1, 3, 0, [m0] ; C80D0301 v_mul_f32_e32 v0, v3, v2 ; 10000503 v_cvt_pkrtz_f16_f32_e32 v0, v7, v0 ; 5E000107 exp 15, 0, 1, 1, 1, v6, v0, v6, v0 ; F8001C0F 00060006 s_endpgm ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: DP4 TEMP[0].x, IN[0], CONST[0] 1: DP4 TEMP[1].x, IN[0], CONST[1] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[0], CONST[2] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[0], CONST[3] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %33, %13 %38 = fmul float %34, %14 %39 = fadd float %37, %38 %40 = fmul float %35, %15 %41 = fadd float %39, %40 %42 = fmul float %36, %16 %43 = fadd float %41, %42 %44 = fmul float %33, %17 %45 = fmul float %34, %18 %46 = fadd float %44, %45 %47 = fmul float %35, %19 %48 = fadd float %46, %47 %49 = fmul float %36, %20 %50 = fadd float %48, %49 %51 = fmul float %33, %21 %52 = fmul float %34, %22 %53 = fadd float %51, %52 %54 = fmul float %35, %23 %55 = fadd float %53, %54 %56 = fmul float %36, %24 %57 = fadd float %55, %56 %58 = fmul float %33, %25 %59 = fmul float %34, %26 %60 = fadd float %58, %59 %61 = fmul float %35, %27 %62 = fadd float %60, %61 %63 = fmul float %36, %28 %64 = fadd float %62, %63 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %43, float %50, float %57, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v1 ; 100C0204 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v2, s4, v6 ; D2820006 04180902 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s4, v6 ; D2820006 04180903 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v1 ; 100E0204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v2, s4, v7 ; D2820007 041C0902 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v7 ; D2820000 041C0103 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL OUT[0], COLOR DCL CONST[0] 0: MOV OUT[0], CONST[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %26, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s4 ; 7E000204 v_cvt_pkrtz_f16_f32_e32 v0, s5, v0 ; 5E000005 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_cvt_pkrtz_f16_f32_e32 v1, s0, v1 ; 5E020200 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL CONST[0..5] DCL TEMP[0..1], LOCAL 0: DP4 TEMP[0].x, IN[2], CONST[2] 1: DP4 TEMP[1].x, IN[2], CONST[3] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[2], CONST[4] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[2], CONST[5] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: MOV OUT[2], IN[1] 8: MOV OUT[3], CONST[0] 9: MOV OUT[1], IN[0] 10: MOV OUT[4], CONST[1] 11: MOV OUT[0], TEMP[0] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = extractelement <4 x float> %56, i32 3 %61 = fmul float %57, %21 %62 = fmul float %58, %22 %63 = fadd float %61, %62 %64 = fmul float %59, %23 %65 = fadd float %63, %64 %66 = fmul float %60, %24 %67 = fadd float %65, %66 %68 = fmul float %57, %25 %69 = fmul float %58, %26 %70 = fadd float %68, %69 %71 = fmul float %59, %27 %72 = fadd float %70, %71 %73 = fmul float %60, %28 %74 = fadd float %72, %73 %75 = fmul float %57, %29 %76 = fmul float %58, %30 %77 = fadd float %75, %76 %78 = fmul float %59, %31 %79 = fadd float %77, %78 %80 = fmul float %60, %32 %81 = fadd float %79, %80 %82 = fmul float %57, %33 %83 = fmul float %58, %34 %84 = fadd float %82, %83 %85 = fmul float %59, %35 %86 = fadd float %84, %85 %87 = fmul float %60, %36 %88 = fadd float %86, %87 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %49, float %50, float %51, float %52) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %67, float %74, float %81, float %88) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 34, 0, 0, 0, v1, v2, v3, v4 ; F800022F 04030201 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 35, 0, 0, 0, v1, v2, v3, v4 ; F800023F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v1 ; 100C0204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v2, s4, v6 ; D2820006 04180902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s4, v6 ; D2820006 04180903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v1 ; 100E0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v2, s4, v7 ; D2820007 041C0902 s_buffer_load_dword s0, s[0:3], 0xb ; C200010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v7 ; D2820000 041C0103 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MAD TEMP[0], IN[0], IN[3], IN[2] 1: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %35 = fmul float %22, %31 %36 = fadd float %35, %27 %37 = fmul float %23, %32 %38 = fadd float %37, %28 %39 = fmul float %24, %33 %40 = fadd float %39, %29 %41 = fmul float %25, %34 %42 = fadd float %41, %30 %43 = fmul float %42, %26 %44 = call i32 @llvm.SI.packf16(float %36, float %38) %45 = bitcast i32 %44 to float %46 = call i32 @llvm.SI.packf16(float %40, float %43) %47 = bitcast i32 %46 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %45, float %47, float %45, float %47) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 2, [m0] ; C8080900 v_interp_p2_f32 v2, [v2], v1, 1, 2, [m0] ; C8090901 v_interp_p1_f32 v3, v0, 1, 3, [m0] ; C80C0D00 v_interp_p2_f32 v3, [v3], v1, 1, 3, [m0] ; C80D0D01 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_mad_f32 v2, v4, v3, v2 ; D2820002 040A0704 v_interp_p1_f32 v3, v0, 0, 2, [m0] ; C80C0800 v_interp_p2_f32 v3, [v3], v1, 0, 2, [m0] ; C80D0801 v_interp_p1_f32 v4, v0, 0, 3, [m0] ; C8100C00 v_interp_p2_f32 v4, [v4], v1, 0, 3, [m0] ; C8110C01 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 v_mad_f32 v3, v5, v4, v3 ; D2820003 040E0905 v_cvt_pkrtz_f16_f32_e32 v2, v3, v2 ; 5E040503 v_interp_p1_f32 v3, v0, 2, 2, [m0] ; C80C0A00 v_interp_p2_f32 v3, [v3], v1, 2, 2, [m0] ; C80D0A01 v_interp_p1_f32 v4, v0, 2, 3, [m0] ; C8100E00 v_interp_p2_f32 v4, [v4], v1, 2, 3, [m0] ; C8110E01 v_interp_p1_f32 v5, v0, 2, 0, [m0] ; C8140200 v_interp_p2_f32 v5, [v5], v1, 2, 0, [m0] ; C8150201 v_mad_f32 v3, v5, v4, v3 ; D2820003 040E0905 v_interp_p1_f32 v4, v0, 3, 2, [m0] ; C8100B00 v_interp_p2_f32 v4, [v4], v1, 3, 2, [m0] ; C8110B01 v_interp_p1_f32 v5, v0, 3, 3, [m0] ; C8140F00 v_interp_p2_f32 v5, [v5], v1, 3, 3, [m0] ; C8150F01 v_interp_p1_f32 v6, v0, 3, 0, [m0] ; C8180300 v_interp_p2_f32 v6, [v6], v1, 3, 0, [m0] ; C8190301 v_mad_f32 v4, v6, v5, v4 ; D2820004 04120B06 v_interp_p1_f32 v5, v0, 3, 1, [m0] ; C8140700 v_interp_p2_f32 v5, [v5], v1, 3, 1, [m0] ; C8150701 v_mul_f32_e32 v0, v5, v4 ; 10000905 v_cvt_pkrtz_f16_f32_e32 v0, v3, v0 ; 5E000103 exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..5] DCL TEMP[0..2], LOCAL 0: DP4 TEMP[0].x, IN[2], CONST[2] 1: DP4 TEMP[1].x, IN[2], CONST[3] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[2], CONST[4] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[2], CONST[5] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: MAD TEMP[1], IN[0], CONST[1], CONST[0] 8: MOV TEMP[2].xy, IN[1].xyxx 9: MOV OUT[1], TEMP[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[2], TEMP[2] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %52 = load <16 x i8> addrspace(2)* %51, !tbaa !0 %53 = add i32 %5, %7 %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %53) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = extractelement <4 x float> %54, i32 3 %59 = fmul float %55, %21 %60 = fmul float %56, %22 %61 = fadd float %59, %60 %62 = fmul float %57, %23 %63 = fadd float %61, %62 %64 = fmul float %58, %24 %65 = fadd float %63, %64 %66 = fmul float %55, %25 %67 = fmul float %56, %26 %68 = fadd float %66, %67 %69 = fmul float %57, %27 %70 = fadd float %68, %69 %71 = fmul float %58, %28 %72 = fadd float %70, %71 %73 = fmul float %55, %29 %74 = fmul float %56, %30 %75 = fadd float %73, %74 %76 = fmul float %57, %31 %77 = fadd float %75, %76 %78 = fmul float %58, %32 %79 = fadd float %77, %78 %80 = fmul float %55, %33 %81 = fmul float %56, %34 %82 = fadd float %80, %81 %83 = fmul float %57, %35 %84 = fadd float %82, %83 %85 = fmul float %58, %36 %86 = fadd float %84, %85 %87 = fmul float %41, %17 %88 = fadd float %87, %13 %89 = fmul float %42, %18 %90 = fadd float %89, %14 %91 = fmul float %43, %19 %92 = fadd float %91, %15 %93 = fmul float %44, %20 %94 = fadd float %93, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %88, float %90, float %92, float %94) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %49, float %50, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %65, float %72, float %79, float %86) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v5, s5, v4, v5 ; D2820005 04160805 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s4 ; 7E0C0204 v_mad_f32 v6, s5, v3, v6 ; D2820006 041A0605 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s4 ; 7E0E0204 v_mad_f32 v7, s5, v2, v7 ; D2820007 041E0405 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x4 ; C2028104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s4 ; 7E100204 v_mad_f32 v1, s5, v1, v8 ; D2820001 04220205 exp 15, 32, 0, 0, 0, v1, v7, v6, v5 ; F800020F 05060701 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v5, v5 ; F800021F 05050201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v1 ; 100C0204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v2, s4, v6 ; D2820006 04180902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s4, v6 ; D2820006 04180903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v1 ; 100E0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v2, s4, v7 ; D2820007 041C0902 s_buffer_load_dword s0, s[0:3], 0xb ; C200010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v7 ; D2820000 041C0103 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MOV TEMP[1].xy, IN[1].xyyy 2: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D 3: MUL TEMP[1].x, IN[0].wwww, TEMP[1].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 2) %39 = extractelement <4 x float> %38, i32 3 %40 = fmul float %29, %39 %41 = call i32 @llvm.SI.packf16(float %26, float %27) %42 = bitcast i32 %41 to float %43 = call i32 @llvm.SI.packf16(float %28, float %40) %44 = bitcast i32 %43 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %42, float %44, float %42, float %44) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 v_cvt_pkrtz_f16_f32_e32 v2, v3, v2 ; 5E040503 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v3, 8, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[4:11], s[0:3] ; F0800800 00010303 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, v3, v4 ; 10060903 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_cvt_pkrtz_f16_f32_e32 v0, v4, v3 ; 5E000704 exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..5] DCL TEMP[0..2], LOCAL 0: DP4 TEMP[0].x, IN[1], CONST[0] 1: DP4 TEMP[1].x, IN[1], CONST[1] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[1], CONST[2] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[1], CONST[3] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: DP4 TEMP[1].x, IN[1], CONST[4] 8: DP4 TEMP[2].x, IN[1], CONST[5] 9: MOV TEMP[1].y, TEMP[2].xxxx 10: MOV TEMP[1].xy, TEMP[1].xyxx 11: MOV OUT[1], IN[0] 12: MOV OUT[0], TEMP[0] 13: MOV OUT[2], TEMP[1] 14: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %49, %13 %54 = fmul float %50, %14 %55 = fadd float %53, %54 %56 = fmul float %51, %15 %57 = fadd float %55, %56 %58 = fmul float %52, %16 %59 = fadd float %57, %58 %60 = fmul float %49, %17 %61 = fmul float %50, %18 %62 = fadd float %60, %61 %63 = fmul float %51, %19 %64 = fadd float %62, %63 %65 = fmul float %52, %20 %66 = fadd float %64, %65 %67 = fmul float %49, %21 %68 = fmul float %50, %22 %69 = fadd float %67, %68 %70 = fmul float %51, %23 %71 = fadd float %69, %70 %72 = fmul float %52, %24 %73 = fadd float %71, %72 %74 = fmul float %49, %25 %75 = fmul float %50, %26 %76 = fadd float %74, %75 %77 = fmul float %51, %27 %78 = fadd float %76, %77 %79 = fmul float %52, %28 %80 = fadd float %78, %79 %81 = fmul float %49, %29 %82 = fmul float %50, %30 %83 = fadd float %81, %82 %84 = fmul float %51, %31 %85 = fadd float %83, %84 %86 = fmul float %52, %32 %87 = fadd float %85, %86 %88 = fmul float %49, %33 %89 = fmul float %50, %34 %90 = fadd float %88, %89 %91 = fmul float %51, %35 %92 = fadd float %90, %91 %93 = fmul float %52, %36 %94 = fadd float %92, %93 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %87, float %94, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %66, float %73, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 v_mov_b32_e32 v6, 0 ; 7E0C0280 exp 15, 33, 0, 0, 0, v5, v4, v6, v6 ; F800021F 06060405 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v1 ; 100C0204 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v2, s4, v6 ; D2820006 04180902 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s4, v6 ; D2820006 04180903 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v1 ; 100E0204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v2, s4, v7 ; D2820007 041C0902 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v7 ; D2820000 041C0103 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xyz, TEMP[0].xyzx 3: MUL TEMP[0].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[1].w, TEMP[0].xxxx 5: MOV OUT[0], TEMP[1] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = bitcast <8 x i32> %23 to <32 x i8> %34 = bitcast <4 x i32> %25 to <16 x i8> %35 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %32, <32 x i8> %33, <16 x i8> %34, i32 2) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = extractelement <4 x float> %35, i32 2 %39 = extractelement <4 x float> %35, i32 3 %40 = fmul float %39, %26 %41 = call i32 @llvm.SI.packf16(float %36, float %37) %42 = bitcast i32 %41 to float %43 = call i32 @llvm.SI.packf16(float %38, float %40) %44 = bitcast i32 %43 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %42, float %44, float %42, float %44) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010202 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v6, v2, v3 ; 5E0C0702 v_interp_p1_f32 v7, v0, 3, 0, [m0] ; C81C0300 v_interp_p2_f32 v7, [v7], v1, 3, 0, [m0] ; C81D0301 v_mul_f32_e32 v0, v7, v5 ; 10000B07 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v6, v0, v6, v0 ; F8001C0F 00060006 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL CONST[0..7] DCL TEMP[0..2], LOCAL 0: DP4 TEMP[0].x, IN[1], CONST[2] 1: DP4 TEMP[1].x, IN[1], CONST[3] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[1], CONST[4] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[1], CONST[5] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: DP4 TEMP[1].x, IN[1], CONST[6] 8: DP4 TEMP[2].x, IN[1], CONST[7] 9: MOV TEMP[1].y, TEMP[2].xxxx 10: MOV TEMP[1].xy, TEMP[1].xyxx 11: MOV OUT[1], IN[0] 12: MOV OUT[2], CONST[0] 13: MOV OUT[3], CONST[1] 14: MOV OUT[0], TEMP[0] 15: MOV OUT[4], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = extractelement <4 x float> %56, i32 3 %61 = fmul float %57, %21 %62 = fmul float %58, %22 %63 = fadd float %61, %62 %64 = fmul float %59, %23 %65 = fadd float %63, %64 %66 = fmul float %60, %24 %67 = fadd float %65, %66 %68 = fmul float %57, %25 %69 = fmul float %58, %26 %70 = fadd float %68, %69 %71 = fmul float %59, %27 %72 = fadd float %70, %71 %73 = fmul float %60, %28 %74 = fadd float %72, %73 %75 = fmul float %57, %29 %76 = fmul float %58, %30 %77 = fadd float %75, %76 %78 = fmul float %59, %31 %79 = fadd float %77, %78 %80 = fmul float %60, %32 %81 = fadd float %79, %80 %82 = fmul float %57, %33 %83 = fmul float %58, %34 %84 = fadd float %82, %83 %85 = fmul float %59, %35 %86 = fadd float %84, %85 %87 = fmul float %60, %36 %88 = fadd float %86, %87 %89 = fmul float %57, %37 %90 = fmul float %58, %38 %91 = fadd float %89, %90 %92 = fmul float %59, %39 %93 = fadd float %91, %92 %94 = fmul float %60, %40 %95 = fadd float %93, %94 %96 = fmul float %57, %41 %97 = fmul float %58, %42 %98 = fadd float %96, %97 %99 = fmul float %59, %43 %100 = fadd float %98, %99 %101 = fmul float %60, %44 %102 = fadd float %100, %101 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %49, float %50, float %51, float %52) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %95, float %102, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %67, float %74, float %81, float %88) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 34, 0, 0, 0, v1, v2, v3, v4 ; F800022F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 v_mov_b32_e32 v6, 0 ; 7E0C0280 exp 15, 35, 0, 0, 0, v5, v4, v6, v6 ; F800023F 06060405 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v1 ; 100C0204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v2, s4, v6 ; D2820006 04180902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s4, v6 ; D2820006 04180903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v1 ; 100E0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v2, s4, v7 ; D2820007 041C0902 s_buffer_load_dword s0, s[0:3], 0xb ; C200010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v7 ; D2820000 041C0103 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { -0.5020, 1.5960, -0.8130, 0.0000} IMM[1] FLT32 { -0.0627, 1.1640, 1.0000, -0.3920} IMM[2] FLT32 { 0.0000, -0.3920, 2.0170, 0.0000} 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0].w, TEMP[0], SAMP[2], 2D 2: ADD TEMP[0].x, TEMP[0].wwww, IMM[0].xxxx 3: MOV TEMP[1].xy, IN[3].xyyy 4: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D 5: ADD TEMP[1].x, TEMP[1].wwww, IMM[1].xxxx 6: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 7: MAD TEMP[0], TEMP[0].xxxx, IMM[0].yzww, TEMP[1].xxxx 8: MOV TEMP[1].xy, IN[3].xyyy 9: TEX TEMP[1].w, TEMP[1], SAMP[1], 2D 10: ADD TEMP[1].x, TEMP[1].wwww, IMM[0].xxxx 11: MAD TEMP[1].xyz, TEMP[1].xxxx, IMM[2].xyzx, TEMP[0] 12: MOV TEMP[0].xyz, TEMP[1].xyzx 13: MOV TEMP[0].w, IMM[1].zzzz 14: MAD TEMP[0], TEMP[0], IN[2], IN[1] 15: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 16: MOV TEMP[0].w, TEMP[1].xxxx 17: MOV OUT[0], TEMP[0] 18: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %45 = bitcast float %43 to i32 %46 = bitcast float %44 to i32 %47 = insertelement <2 x i32> undef, i32 %45, i32 0 %48 = insertelement <2 x i32> %47, i32 %46, i32 1 %49 = bitcast <8 x i32> %31 to <32 x i8> %50 = bitcast <4 x i32> %33 to <16 x i8> %51 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %48, <32 x i8> %49, <16 x i8> %50, i32 2) %52 = extractelement <4 x float> %51, i32 3 %53 = fadd float %52, 0xBFE0101020000000 %54 = bitcast float %43 to i32 %55 = bitcast float %44 to i32 %56 = insertelement <2 x i32> undef, i32 %54, i32 0 %57 = insertelement <2 x i32> %56, i32 %55, i32 1 %58 = bitcast <8 x i32> %23 to <32 x i8> %59 = bitcast <4 x i32> %25 to <16 x i8> %60 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %57, <32 x i8> %58, <16 x i8> %59, i32 2) %61 = extractelement <4 x float> %60, i32 3 %62 = fadd float %61, 0xBFB0101020000000 %63 = fmul float %62, 0x3FF29FBE80000000 %64 = fmul float %53, 0x3FF9893740000000 %65 = fadd float %64, %63 %66 = fmul float %53, 0xBFEA0418A0000000 %67 = fadd float %66, %63 %68 = fmul float %53, 0.000000e+00 %69 = fadd float %68, %63 %70 = bitcast float %43 to i32 %71 = bitcast float %44 to i32 %72 = insertelement <2 x i32> undef, i32 %70, i32 0 %73 = insertelement <2 x i32> %72, i32 %71, i32 1 %74 = bitcast <8 x i32> %27 to <32 x i8> %75 = bitcast <4 x i32> %29 to <16 x i8> %76 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %73, <32 x i8> %74, <16 x i8> %75, i32 2) %77 = extractelement <4 x float> %76, i32 3 %78 = fadd float %77, 0xBFE0101020000000 %79 = fmul float %78, 0.000000e+00 %80 = fadd float %79, %65 %81 = fmul float %78, 0xBFD9168720000000 %82 = fadd float %81, %67 %83 = fmul float %78, 0x400022D0E0000000 %84 = fadd float %83, %69 %85 = fmul float %80, %39 %86 = fadd float %85, %35 %87 = fmul float %82, %40 %88 = fadd float %87, %36 %89 = fmul float %84, %41 %90 = fadd float %89, %37 %91 = fmul float 1.000000e+00, %42 %92 = fadd float %91, %38 %93 = fmul float %92, %34 %94 = call i32 @llvm.SI.packf16(float %86, float %88) %95 = bitcast i32 %94 to float %96 = call i32 @llvm.SI.packf16(float %90, float %93) %97 = bitcast i32 %96 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %95, float %97, float %95, float %97) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 3, [m0] ; C80C0D00 v_interp_p2_f32 v3, [v3], v1, 1, 3, [m0] ; C80D0D01 v_interp_p1_f32 v2, v0, 0, 3, [m0] ; C8080C00 v_interp_p2_f32 v2, [v2], v1, 0, 3, [m0] ; C8090C01 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_load_dwordx8 s[32:39], s[6:7], 0x10 ; C0D00710 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v4, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[12:15] ; F0800800 00680402 v_mov_b32_e32 v5, 0xbf008081 ; 7E0A02FF BF008081 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v5, v4 ; 06080905 image_sample v6, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[0:3] ; F0800800 00040602 v_mov_b32_e32 v7, 0xbd808081 ; 7E0E02FF BD808081 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v6, v6, v7 ; 060C0F06 v_mul_f32_e32 v6, 0x3f94fdf4, v6 ; 100C0CFF 3F94FDF4 v_mov_b32_e32 v7, 0xbf5020c5 ; 7E0E02FF BF5020C5 v_mad_f32 v7, v7, v4, v6 ; D2820007 041A0907 image_sample v2, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[8:11] ; F0800800 00460202 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v2, v5, v2 ; 06040505 v_mov_b32_e32 v3, 0xbec8b439 ; 7E0602FF BEC8B439 v_mad_f32 v3, v3, v2, v7 ; D2820003 041E0503 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 v_mad_f32 v3, v3, v7, v5 ; D2820003 04160F03 v_mov_b32_e32 v5, 0x3fcc49ba ; 7E0A02FF 3FCC49BA v_mad_f32 v5, v5, v4, v6 ; D2820005 041A0905 v_mad_f32 v5, 0, v2, v5 ; D2820005 04160480 v_interp_p1_f32 v7, v0, 0, 1, [m0] ; C81C0400 v_interp_p2_f32 v7, [v7], v1, 0, 1, [m0] ; C81D0401 v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800 v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801 v_mad_f32 v5, v5, v8, v7 ; D2820005 041E1105 v_cvt_pkrtz_f16_f32_e32 v3, v5, v3 ; 5E060705 v_mad_f32 v4, 0, v4, v6 ; D2820004 041A0880 v_mov_b32_e32 v5, 0x40011687 ; 7E0A02FF 40011687 v_mad_f32 v2, v5, v2, v4 ; D2820002 04120505 v_interp_p1_f32 v4, v0, 2, 1, [m0] ; C8100600 v_interp_p2_f32 v4, [v4], v1, 2, 1, [m0] ; C8110601 v_interp_p1_f32 v5, v0, 2, 2, [m0] ; C8140A00 v_interp_p2_f32 v5, [v5], v1, 2, 2, [m0] ; C8150A01 v_mad_f32 v2, v2, v5, v4 ; D2820002 04120B02 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 3, 2, [m0] ; C8140B00 v_interp_p2_f32 v5, [v5], v1, 3, 2, [m0] ; C8150B01 v_add_f32_e32 v4, v4, v5 ; 06080B04 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_mul_f32_e32 v0, v5, v4 ; 10000905 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 exp 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: DP4 TEMP[0].x, IN[2], CONST[0] 1: DP4 TEMP[1].x, IN[2], CONST[1] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[2], CONST[2] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[2], CONST[3] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: MOV OUT[2], IN[1] 8: MOV OUT[1], IN[0] 9: MOV OUT[0], TEMP[0] 10: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %49, %13 %54 = fmul float %50, %14 %55 = fadd float %53, %54 %56 = fmul float %51, %15 %57 = fadd float %55, %56 %58 = fmul float %52, %16 %59 = fadd float %57, %58 %60 = fmul float %49, %17 %61 = fmul float %50, %18 %62 = fadd float %60, %61 %63 = fmul float %51, %19 %64 = fadd float %62, %63 %65 = fmul float %52, %20 %66 = fadd float %64, %65 %67 = fmul float %49, %21 %68 = fmul float %50, %22 %69 = fadd float %67, %68 %70 = fmul float %51, %23 %71 = fadd float %69, %70 %72 = fmul float %52, %24 %73 = fadd float %71, %72 %74 = fmul float %49, %25 %75 = fmul float %50, %26 %76 = fadd float %74, %75 %77 = fmul float %51, %27 %78 = fadd float %76, %77 %79 = fmul float %52, %28 %80 = fadd float %78, %79 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %33, float %34, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %66, float %73, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v1 ; 100C0204 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v2, s4, v6 ; D2820006 04180902 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s4, v6 ; D2820006 04180903 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v1 ; 100E0204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v2, s4, v7 ; D2820007 041C0902 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v7 ; D2820000 041C0103 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MUL TEMP[1].x, IN[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = fmul float %25, %26 %28 = call i32 @llvm.SI.packf16(float %22, float %23) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %24, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 v_cvt_pkrtz_f16_f32_e32 v2, v3, v2 ; 5E040503 v_interp_p1_f32 v3, v0, 3, 1, [m0] ; C80C0700 v_interp_p2_f32 v3, [v3], v1, 3, 1, [m0] ; C80D0701 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_mul_f32_e32 v3, v3, v4 ; 10060903 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_cvt_pkrtz_f16_f32_e32 v0, v4, v3 ; 5E000704 exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..5] DCL TEMP[0..2], LOCAL 0: DP4 TEMP[0].x, IN[1], CONST[0] 1: DP4 TEMP[1].x, IN[1], CONST[1] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[1], CONST[2] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[1], CONST[3] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: DP4 TEMP[1].x, IN[1], CONST[4] 8: DP4 TEMP[2].x, IN[1], CONST[5] 9: MOV TEMP[1].y, TEMP[2].xxxx 10: MOV TEMP[1].xy, TEMP[1].xyxx 11: MOV OUT[1], IN[0] 12: MOV OUT[0], TEMP[0] 13: MOV OUT[2], TEMP[1] 14: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %49, %13 %54 = fmul float %50, %14 %55 = fadd float %53, %54 %56 = fmul float %51, %15 %57 = fadd float %55, %56 %58 = fmul float %52, %16 %59 = fadd float %57, %58 %60 = fmul float %49, %17 %61 = fmul float %50, %18 %62 = fadd float %60, %61 %63 = fmul float %51, %19 %64 = fadd float %62, %63 %65 = fmul float %52, %20 %66 = fadd float %64, %65 %67 = fmul float %49, %21 %68 = fmul float %50, %22 %69 = fadd float %67, %68 %70 = fmul float %51, %23 %71 = fadd float %69, %70 %72 = fmul float %52, %24 %73 = fadd float %71, %72 %74 = fmul float %49, %25 %75 = fmul float %50, %26 %76 = fadd float %74, %75 %77 = fmul float %51, %27 %78 = fadd float %76, %77 %79 = fmul float %52, %28 %80 = fadd float %78, %79 %81 = fmul float %49, %29 %82 = fmul float %50, %30 %83 = fadd float %81, %82 %84 = fmul float %51, %31 %85 = fadd float %83, %84 %86 = fmul float %52, %32 %87 = fadd float %85, %86 %88 = fmul float %49, %33 %89 = fmul float %50, %34 %90 = fadd float %88, %89 %91 = fmul float %51, %35 %92 = fadd float %90, %91 %93 = fmul float %52, %36 %94 = fadd float %92, %93 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %87, float %94, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %66, float %73, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 v_mov_b32_e32 v6, 0 ; 7E0C0280 exp 15, 33, 0, 0, 0, v5, v4, v6, v6 ; F800021F 06060405 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v1 ; 100C0204 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v2, s4, v6 ; D2820006 04180902 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s4, v6 ; D2820006 04180903 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v1 ; 100E0204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v2, s4, v7 ; D2820007 041C0902 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v7 ; D2820000 041C0103 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { -0.5020, 1.5960, -0.8130, 0.0000} IMM[1] FLT32 { -0.0627, 1.1640, 1.0000, -0.3920} IMM[2] FLT32 { 0.0000, -0.3920, 2.0170, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0].w, TEMP[0], SAMP[2], 2D 2: ADD TEMP[0].x, TEMP[0].wwww, IMM[0].xxxx 3: MOV TEMP[1].xy, IN[1].xyyy 4: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D 5: ADD TEMP[1].x, TEMP[1].wwww, IMM[1].xxxx 6: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 7: MAD TEMP[0], TEMP[0].xxxx, IMM[0].yzww, TEMP[1].xxxx 8: MOV TEMP[1].xy, IN[1].xyyy 9: TEX TEMP[1].w, TEMP[1], SAMP[1], 2D 10: ADD TEMP[1].x, TEMP[1].wwww, IMM[0].xxxx 11: MAD TEMP[0].xyz, TEMP[1].xxxx, IMM[2].xyzx, TEMP[0] 12: MOV TEMP[0].xyz, TEMP[0].xyzx 13: MOV TEMP[0].w, IN[0].wwww 14: MOV OUT[0], TEMP[0] 15: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = bitcast <8 x i32> %31 to <32 x i8> %42 = bitcast <4 x i32> %33 to <16 x i8> %43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %41, <16 x i8> %42, i32 2) %44 = extractelement <4 x float> %43, i32 3 %45 = fadd float %44, 0xBFE0101020000000 %46 = bitcast float %35 to i32 %47 = bitcast float %36 to i32 %48 = insertelement <2 x i32> undef, i32 %46, i32 0 %49 = insertelement <2 x i32> %48, i32 %47, i32 1 %50 = bitcast <8 x i32> %23 to <32 x i8> %51 = bitcast <4 x i32> %25 to <16 x i8> %52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %50, <16 x i8> %51, i32 2) %53 = extractelement <4 x float> %52, i32 3 %54 = fadd float %53, 0xBFB0101020000000 %55 = fmul float %54, 0x3FF29FBE80000000 %56 = fmul float %45, 0x3FF9893740000000 %57 = fadd float %56, %55 %58 = fmul float %45, 0xBFEA0418A0000000 %59 = fadd float %58, %55 %60 = fmul float %45, 0.000000e+00 %61 = fadd float %60, %55 %62 = bitcast float %35 to i32 %63 = bitcast float %36 to i32 %64 = insertelement <2 x i32> undef, i32 %62, i32 0 %65 = insertelement <2 x i32> %64, i32 %63, i32 1 %66 = bitcast <8 x i32> %27 to <32 x i8> %67 = bitcast <4 x i32> %29 to <16 x i8> %68 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %65, <32 x i8> %66, <16 x i8> %67, i32 2) %69 = extractelement <4 x float> %68, i32 3 %70 = fadd float %69, 0xBFE0101020000000 %71 = fmul float %70, 0.000000e+00 %72 = fadd float %71, %57 %73 = fmul float %70, 0xBFD9168720000000 %74 = fadd float %73, %59 %75 = fmul float %70, 0x400022D0E0000000 %76 = fadd float %75, %61 %77 = call i32 @llvm.SI.packf16(float %72, float %74) %78 = bitcast i32 %77 to float %79 = call i32 @llvm.SI.packf16(float %76, float %34) %80 = bitcast i32 %79 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %78, float %80, float %78, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_load_dwordx8 s[32:39], s[6:7], 0x10 ; C0D00710 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v4, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[12:15] ; F0800800 00680402 v_mov_b32_e32 v5, 0xbf008081 ; 7E0A02FF BF008081 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v5, v4 ; 06080905 image_sample v6, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[0:3] ; F0800800 00040602 v_mov_b32_e32 v7, 0xbd808081 ; 7E0E02FF BD808081 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v6, v6, v7 ; 060C0F06 v_mul_f32_e32 v6, 0x3f94fdf4, v6 ; 100C0CFF 3F94FDF4 v_mov_b32_e32 v7, 0xbf5020c5 ; 7E0E02FF BF5020C5 v_mad_f32 v7, v7, v4, v6 ; D2820007 041A0907 image_sample v2, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[8:11] ; F0800800 00460202 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v2, v5, v2 ; 06040505 v_mov_b32_e32 v3, 0xbec8b439 ; 7E0602FF BEC8B439 v_mad_f32 v3, v3, v2, v7 ; D2820003 041E0503 v_mov_b32_e32 v5, 0x3fcc49ba ; 7E0A02FF 3FCC49BA v_mad_f32 v5, v5, v4, v6 ; D2820005 041A0905 v_mad_f32 v5, 0, v2, v5 ; D2820005 04160480 v_cvt_pkrtz_f16_f32_e32 v3, v5, v3 ; 5E060705 v_mad_f32 v4, 0, v4, v6 ; D2820004 041A0880 v_mov_b32_e32 v5, 0x40011687 ; 7E0A02FF 40011687 v_mad_f32 v2, v5, v2, v4 ; D2820002 04120505 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_cvt_pkrtz_f16_f32_e32 v0, v2, v4 ; 5E000902 exp 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[2] 2: DP4 TEMP[1].x, IN[1], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[1], CONST[4] 5: DP4 TEMP[2].x, IN[1], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], IN[0] 9: MOV OUT[2], CONST[0] 10: MOV OUT[3], CONST[1] 11: MOV OUT[0], TEMP[0] 12: MOV OUT[4], TEMP[1] 13: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %49, %21 %54 = fmul float %50, %22 %55 = fadd float %53, %54 %56 = fmul float %51, %23 %57 = fadd float %55, %56 %58 = fmul float %52, %24 %59 = fadd float %57, %58 %60 = fmul float %49, %25 %61 = fmul float %50, %26 %62 = fadd float %60, %61 %63 = fmul float %51, %27 %64 = fadd float %62, %63 %65 = fmul float %52, %28 %66 = fadd float %64, %65 %67 = fmul float %49, %29 %68 = fmul float %50, %30 %69 = fadd float %67, %68 %70 = fmul float %51, %31 %71 = fadd float %69, %70 %72 = fmul float %52, %32 %73 = fadd float %71, %72 %74 = fmul float %49, %33 %75 = fmul float %50, %34 %76 = fadd float %74, %75 %77 = fmul float %51, %35 %78 = fadd float %76, %77 %79 = fmul float %52, %36 %80 = fadd float %78, %79 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %73, float %80, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %66, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v1, s7 ; 7E020207 v_mov_b32_e32 v2, s6 ; 7E040206 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 34, 0, 0, 0, v1, v2, v3, v4 ; F800022F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 v_mov_b32_e32 v6, 0 ; 7E0C0280 exp 15, 35, 0, 0, 0, v5, v4, v6, v6 ; F800023F 06060405 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s0, s[0:3], 0xb ; C200010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s0, v5 ; D2820000 04140103 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v0, v4, v6, v1 ; F80008CF 01060400 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { -0.5020, 1.5960, -0.8130, 0.0000} IMM[1] FLT32 { -0.0627, 1.1640, 1.0000, -0.3920} IMM[2] FLT32 { 0.0000, -0.3920, 2.0170, 0.0000} 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0].w, TEMP[0], SAMP[2], 2D 2: ADD TEMP[0].x, TEMP[0].wwww, IMM[0].xxxx 3: MOV TEMP[1].xy, IN[3].xyyy 4: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D 5: ADD TEMP[1].x, TEMP[1].wwww, IMM[1].xxxx 6: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 7: MAD TEMP[0], TEMP[0].xxxx, IMM[0].yzww, TEMP[1].xxxx 8: MOV TEMP[1].xy, IN[3].xyyy 9: TEX TEMP[1].w, TEMP[1], SAMP[1], 2D 10: ADD TEMP[1].x, TEMP[1].wwww, IMM[0].xxxx 11: MAD TEMP[1].xyz, TEMP[1].xxxx, IMM[2].xyzx, TEMP[0] 12: MOV TEMP[0].xyz, TEMP[1].xyzx 13: MOV TEMP[0].w, IMM[1].zzzz 14: MAD TEMP[0], TEMP[0], IN[2], IN[1] 15: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 16: MOV TEMP[0].w, TEMP[1].xxxx 17: MOV OUT[0], TEMP[0] 18: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %45 = bitcast float %43 to i32 %46 = bitcast float %44 to i32 %47 = insertelement <2 x i32> undef, i32 %45, i32 0 %48 = insertelement <2 x i32> %47, i32 %46, i32 1 %49 = bitcast <8 x i32> %31 to <32 x i8> %50 = bitcast <4 x i32> %33 to <16 x i8> %51 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %48, <32 x i8> %49, <16 x i8> %50, i32 2) %52 = extractelement <4 x float> %51, i32 3 %53 = fadd float %52, 0xBFE0101020000000 %54 = bitcast float %43 to i32 %55 = bitcast float %44 to i32 %56 = insertelement <2 x i32> undef, i32 %54, i32 0 %57 = insertelement <2 x i32> %56, i32 %55, i32 1 %58 = bitcast <8 x i32> %23 to <32 x i8> %59 = bitcast <4 x i32> %25 to <16 x i8> %60 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %57, <32 x i8> %58, <16 x i8> %59, i32 2) %61 = extractelement <4 x float> %60, i32 3 %62 = fadd float %61, 0xBFB0101020000000 %63 = fmul float %62, 0x3FF29FBE80000000 %64 = fmul float %53, 0x3FF9893740000000 %65 = fadd float %64, %63 %66 = fmul float %53, 0xBFEA0418A0000000 %67 = fadd float %66, %63 %68 = fmul float %53, 0.000000e+00 %69 = fadd float %68, %63 %70 = bitcast float %43 to i32 %71 = bitcast float %44 to i32 %72 = insertelement <2 x i32> undef, i32 %70, i32 0 %73 = insertelement <2 x i32> %72, i32 %71, i32 1 %74 = bitcast <8 x i32> %27 to <32 x i8> %75 = bitcast <4 x i32> %29 to <16 x i8> %76 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %73, <32 x i8> %74, <16 x i8> %75, i32 2) %77 = extractelement <4 x float> %76, i32 3 %78 = fadd float %77, 0xBFE0101020000000 %79 = fmul float %78, 0.000000e+00 %80 = fadd float %79, %65 %81 = fmul float %78, 0xBFD9168720000000 %82 = fadd float %81, %67 %83 = fmul float %78, 0x400022D0E0000000 %84 = fadd float %83, %69 %85 = fmul float %80, %39 %86 = fadd float %85, %35 %87 = fmul float %82, %40 %88 = fadd float %87, %36 %89 = fmul float %84, %41 %90 = fadd float %89, %37 %91 = fmul float 1.000000e+00, %42 %92 = fadd float %91, %38 %93 = fmul float %92, %34 %94 = call i32 @llvm.SI.packf16(float %86, float %88) %95 = bitcast i32 %94 to float %96 = call i32 @llvm.SI.packf16(float %90, float %93) %97 = bitcast i32 %96 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %95, float %97, float %95, float %97) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 3, [m0] ; C80C0D00 v_interp_p2_f32 v3, [v3], v1, 1, 3, [m0] ; C80D0D01 v_interp_p1_f32 v2, v0, 0, 3, [m0] ; C8080C00 v_interp_p2_f32 v2, [v2], v1, 0, 3, [m0] ; C8090C01 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_load_dwordx8 s[32:39], s[6:7], 0x10 ; C0D00710 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v4, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[12:15] ; F0800800 00680402 v_mov_b32_e32 v5, 0xbf008081 ; 7E0A02FF BF008081 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v5, v4 ; 06080905 image_sample v6, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[0:3] ; F0800800 00040602 v_mov_b32_e32 v7, 0xbd808081 ; 7E0E02FF BD808081 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v6, v6, v7 ; 060C0F06 v_mul_f32_e32 v6, 0x3f94fdf4, v6 ; 100C0CFF 3F94FDF4 v_mov_b32_e32 v7, 0xbf5020c5 ; 7E0E02FF BF5020C5 v_mad_f32 v7, v7, v4, v6 ; D2820007 041A0907 image_sample v2, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[8:11] ; F0800800 00460202 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v2, v5, v2 ; 06040505 v_mov_b32_e32 v3, 0xbec8b439 ; 7E0602FF BEC8B439 v_mad_f32 v3, v3, v2, v7 ; D2820003 041E0503 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 v_mad_f32 v3, v3, v7, v5 ; D2820003 04160F03 v_mov_b32_e32 v5, 0x3fcc49ba ; 7E0A02FF 3FCC49BA v_mad_f32 v5, v5, v4, v6 ; D2820005 041A0905 v_mad_f32 v5, 0, v2, v5 ; D2820005 04160480 v_interp_p1_f32 v7, v0, 0, 1, [m0] ; C81C0400 v_interp_p2_f32 v7, [v7], v1, 0, 1, [m0] ; C81D0401 v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800 v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801 v_mad_f32 v5, v5, v8, v7 ; D2820005 041E1105 v_cvt_pkrtz_f16_f32_e32 v3, v5, v3 ; 5E060705 v_mad_f32 v4, 0, v4, v6 ; D2820004 041A0880 v_mov_b32_e32 v5, 0x40011687 ; 7E0A02FF 40011687 v_mad_f32 v2, v5, v2, v4 ; D2820002 04120505 v_interp_p1_f32 v4, v0, 2, 1, [m0] ; C8100600 v_interp_p2_f32 v4, [v4], v1, 2, 1, [m0] ; C8110601 v_interp_p1_f32 v5, v0, 2, 2, [m0] ; C8140A00 v_interp_p2_f32 v5, [v5], v1, 2, 2, [m0] ; C8150A01 v_mad_f32 v2, v2, v5, v4 ; D2820002 04120B02 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 3, 2, [m0] ; C8140B00 v_interp_p2_f32 v5, [v5], v1, 3, 2, [m0] ; C8150B01 v_add_f32_e32 v4, v4, v5 ; 06080B04 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_mul_f32_e32 v0, v5, v4 ; 10000905 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 exp 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 s_endpgm ; BF810000 ##### CMaterialReference::Init got error material for models\heroes\morphling\watersprites\waveform_sprites2_oriented.vmt in tex group Other textures ##### CMaterialReference::Init got error material for models\heroes\morphling\watersprites\waveform_sprites2_oriented.vmt in tex group Other textures ##### CMaterialReference::Init got error material for models\heroes\morphling\watersprites\waveform_sprites2_oriented.vmt in tex group Other texturesVERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL CONST[0..70] DCL TEMP[0..4], LOCAL 0: MAD TEMP[0].xyz, IN[1].xyzz, CONST[66].xxxx, IN[0].xyzz 1: MOV TEMP[0].w, IN[0].wwww 2: DP4 TEMP[1].x, TEMP[0], CONST[67] 3: DP4 TEMP[2].x, TEMP[0], CONST[68] 4: MOV TEMP[1].y, TEMP[2].xxxx 5: DP4 TEMP[0].x, TEMP[0], CONST[69] 6: MOV TEMP[1].z, TEMP[0].xxxx 7: MOV TEMP[1].w, CONST[0].yyyy 8: DP4 TEMP[0].x, TEMP[1], CONST[8] 9: DP4 TEMP[2].x, TEMP[1], CONST[9] 10: MOV TEMP[0].y, TEMP[2].xxxx 11: DP4 TEMP[3].x, TEMP[1], CONST[10] 12: MOV TEMP[0].z, TEMP[3].xxxx 13: DP4 TEMP[1].x, TEMP[1], CONST[11] 14: MOV TEMP[0].w, TEMP[1].xxxx 15: MOV TEMP[4], TEMP[0] 16: MAD TEMP[3].x, TEMP[3].xxxx, CONST[0].zzzz, -TEMP[1].xxxx 17: MOV TEMP[0].z, TEMP[3].xxxx 18: MOV TEMP[0].y, -TEMP[2].xxxx 19: MAD TEMP[0].xy, CONST[70].xyyy, TEMP[1].xxxx, TEMP[0].xyyy 20: MOV OUT[0], TEMP[0] 21: MOV OUT[1], TEMP[4] 22: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1056) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1072) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1076) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1080) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1084) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1088) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1092) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1096) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1100) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1104) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1108) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1112) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1116) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1120) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1124) %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %55 = load <16 x i8> addrspace(2)* %54, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = fmul float %58, %31 %62 = fadd float %61, %50 %63 = fmul float %59, %31 %64 = fadd float %63, %51 %65 = fmul float %60, %31 %66 = fadd float %65, %52 %67 = fmul float %62, %32 %68 = fmul float %64, %33 %69 = fadd float %67, %68 %70 = fmul float %66, %34 %71 = fadd float %69, %70 %72 = fmul float %53, %35 %73 = fadd float %71, %72 %74 = fmul float %62, %36 %75 = fmul float %64, %37 %76 = fadd float %74, %75 %77 = fmul float %66, %38 %78 = fadd float %76, %77 %79 = fmul float %53, %39 %80 = fadd float %78, %79 %81 = fmul float %62, %40 %82 = fmul float %64, %41 %83 = fadd float %81, %82 %84 = fmul float %66, %42 %85 = fadd float %83, %84 %86 = fmul float %53, %43 %87 = fadd float %85, %86 %88 = fmul float %73, %15 %89 = fmul float %80, %16 %90 = fadd float %88, %89 %91 = fmul float %87, %17 %92 = fadd float %90, %91 %93 = fmul float %13, %18 %94 = fadd float %92, %93 %95 = fmul float %73, %19 %96 = fmul float %80, %20 %97 = fadd float %95, %96 %98 = fmul float %87, %21 %99 = fadd float %97, %98 %100 = fmul float %13, %22 %101 = fadd float %99, %100 %102 = fmul float %73, %23 %103 = fmul float %80, %24 %104 = fadd float %102, %103 %105 = fmul float %87, %25 %106 = fadd float %104, %105 %107 = fmul float %13, %26 %108 = fadd float %106, %107 %109 = fmul float %73, %27 %110 = fmul float %80, %28 %111 = fadd float %109, %110 %112 = fmul float %87, %29 %113 = fadd float %111, %112 %114 = fmul float %13, %30 %115 = fadd float %113, %114 %116 = fsub float -0.000000e+00, %115 %117 = fmul float %108, %14 %118 = fadd float %117, %116 %119 = fsub float -0.000000e+00, %101 %120 = fmul float %44, %115 %121 = fadd float %120, %94 %122 = fmul float %45, %115 %123 = fadd float %122, %119 %124 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 16 %125 = load <16 x i8> addrspace(2)* %124, !tbaa !0 %126 = call float @llvm.SI.load.const(<16 x i8> %125, i32 0) %127 = fmul float %126, %94 %128 = call float @llvm.SI.load.const(<16 x i8> %125, i32 4) %129 = fmul float %128, %101 %130 = fadd float %127, %129 %131 = call float @llvm.SI.load.const(<16 x i8> %125, i32 8) %132 = fmul float %131, %108 %133 = fadd float %130, %132 %134 = call float @llvm.SI.load.const(<16 x i8> %125, i32 12) %135 = fmul float %134, %115 %136 = fadd float %133, %135 %137 = call float @llvm.SI.load.const(<16 x i8> %125, i32 16) %138 = fmul float %137, %94 %139 = call float @llvm.SI.load.const(<16 x i8> %125, i32 20) %140 = fmul float %139, %101 %141 = fadd float %138, %140 %142 = call float @llvm.SI.load.const(<16 x i8> %125, i32 24) %143 = fmul float %142, %108 %144 = fadd float %141, %143 %145 = call float @llvm.SI.load.const(<16 x i8> %125, i32 28) %146 = fmul float %145, %115 %147 = fadd float %144, %146 %148 = call float @llvm.SI.load.const(<16 x i8> %125, i32 32) %149 = fmul float %148, %94 %150 = call float @llvm.SI.load.const(<16 x i8> %125, i32 36) %151 = fmul float %150, %101 %152 = fadd float %149, %151 %153 = call float @llvm.SI.load.const(<16 x i8> %125, i32 40) %154 = fmul float %153, %108 %155 = fadd float %152, %154 %156 = call float @llvm.SI.load.const(<16 x i8> %125, i32 44) %157 = fmul float %156, %115 %158 = fadd float %155, %157 %159 = call float @llvm.SI.load.const(<16 x i8> %125, i32 48) %160 = fmul float %159, %94 %161 = call float @llvm.SI.load.const(<16 x i8> %125, i32 52) %162 = fmul float %161, %101 %163 = fadd float %160, %162 %164 = call float @llvm.SI.load.const(<16 x i8> %125, i32 56) %165 = fmul float %164, %108 %166 = fadd float %163, %165 %167 = call float @llvm.SI.load.const(<16 x i8> %125, i32 60) %168 = fmul float %167, %115 %169 = fadd float %166, %168 %170 = call float @llvm.SI.load.const(<16 x i8> %125, i32 64) %171 = fmul float %170, %94 %172 = call float @llvm.SI.load.const(<16 x i8> %125, i32 68) %173 = fmul float %172, %101 %174 = fadd float %171, %173 %175 = call float @llvm.SI.load.const(<16 x i8> %125, i32 72) %176 = fmul float %175, %108 %177 = fadd float %174, %176 %178 = call float @llvm.SI.load.const(<16 x i8> %125, i32 76) %179 = fmul float %178, %115 %180 = fadd float %177, %179 %181 = call float @llvm.SI.load.const(<16 x i8> %125, i32 80) %182 = fmul float %181, %94 %183 = call float @llvm.SI.load.const(<16 x i8> %125, i32 84) %184 = fmul float %183, %101 %185 = fadd float %182, %184 %186 = call float @llvm.SI.load.const(<16 x i8> %125, i32 88) %187 = fmul float %186, %108 %188 = fadd float %185, %187 %189 = call float @llvm.SI.load.const(<16 x i8> %125, i32 92) %190 = fmul float %189, %115 %191 = fadd float %188, %190 %192 = call float @llvm.SI.load.const(<16 x i8> %125, i32 96) %193 = fmul float %192, %94 %194 = call float @llvm.SI.load.const(<16 x i8> %125, i32 100) %195 = fmul float %194, %101 %196 = fadd float %193, %195 %197 = call float @llvm.SI.load.const(<16 x i8> %125, i32 104) %198 = fmul float %197, %108 %199 = fadd float %196, %198 %200 = call float @llvm.SI.load.const(<16 x i8> %125, i32 108) %201 = fmul float %200, %115 %202 = fadd float %199, %201 %203 = call float @llvm.SI.load.const(<16 x i8> %125, i32 112) %204 = fmul float %203, %94 %205 = call float @llvm.SI.load.const(<16 x i8> %125, i32 116) %206 = fmul float %205, %101 %207 = fadd float %204, %206 %208 = call float @llvm.SI.load.const(<16 x i8> %125, i32 120) %209 = fmul float %208, %108 %210 = fadd float %207, %209 %211 = call float @llvm.SI.load.const(<16 x i8> %125, i32 124) %212 = fmul float %211, %115 %213 = fadd float %210, %212 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %121, float %123, float %118, float %115) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %136, float %147, float %158, float %169) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %180, float %191, float %202, float %213) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_movk_i32 s0, 0x420 ; B0000420 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[4:7], s0 ; C2000400 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v5, s0, v1 ; D2820000 04040105 v_mad_f32 v9, v6, s0, v2 ; D2820009 04080106 s_movk_i32 s1, 0x434 ; B0010434 s_buffer_load_dword s1, s[4:7], s1 ; C2008401 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s1, v9 ; 10141201 s_movk_i32 s1, 0x430 ; B0010430 s_buffer_load_dword s1, s[4:7], s1 ; C2008401 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v0, s1, v10 ; D282000A 04280300 v_mad_f32 v5, v7, s0, v3 ; D2820005 040C0107 s_movk_i32 s0, 0x438 ; B0000438 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v5, s0, v10 ; D2820006 04280105 s_movk_i32 s0, 0x43c ; B000043C s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v4, s0, v6 ; D2820006 04180104 s_movk_i32 s0, 0x444 ; B0000444 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s0, v9 ; 100E1200 s_movk_i32 s0, 0x440 ; B0000440 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s0, v7 ; D2820007 041C0100 s_movk_i32 s0, 0x448 ; B0000448 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v5, s0, v7 ; D2820007 041C0105 s_movk_i32 s0, 0x44c ; B000044C s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v4, s0, v7 ; D2820007 041C0104 s_buffer_load_dword s0, s[4:7], 0x2d ; C200052D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s0, v7 ; 10100E00 s_buffer_load_dword s0, s[4:7], 0x2c ; C200052C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v6, s0, v8 ; D2820008 04200106 s_movk_i32 s0, 0x454 ; B0000454 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s0, v9 ; 10121200 s_movk_i32 s0, 0x450 ; B0000450 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s0, v9 ; D2820000 04240100 s_movk_i32 s0, 0x458 ; B0000458 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v5, s0, v0 ; D2820000 04000105 s_movk_i32 s0, 0x45c ; B000045C s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s0, v0 ; D2820000 04000104 s_buffer_load_dword s0, s[4:7], 0x2e ; C200052E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v0, s0, v8 ; D2820001 04200100 s_buffer_load_dword s0, s[4:7], 0x2f ; C200052F s_buffer_load_dword s1, s[4:7], 0x1 ; C2008501 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s0 ; 7E040200 v_mad_f32 v1, v2, s1, v1 ; D2820001 04040302 s_buffer_load_dword s0, s[4:7], 0x29 ; C2000529 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s0, v7 ; 10040E00 s_buffer_load_dword s0, s[4:7], 0x28 ; C2000528 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v6, s0, v2 ; D2820002 04080106 s_buffer_load_dword s0, s[4:7], 0x2a ; C200052A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s0, v2 ; D2820002 04080100 s_buffer_load_dword s0, s[4:7], 0x2b ; C200052B s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s0 ; 7E060200 v_mad_f32 v2, v3, s1, v2 ; D2820002 04080303 s_buffer_load_dword s0, s[4:7], 0x2 ; C2000502 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v2, s0, -v1 ; D2820003 84040102 s_buffer_load_dword s0, s[4:7], 0x25 ; C2000525 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s0, v7 ; 10080E00 s_buffer_load_dword s0, s[4:7], 0x24 ; C2000524 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v6, s0, v4 ; D2820004 04100106 s_buffer_load_dword s0, s[4:7], 0x26 ; C2000526 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s0, v4 ; D2820004 04100100 s_buffer_load_dword s0, s[4:7], 0x27 ; C2000527 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s0 ; 7E0A0200 v_mad_f32 v4, v5, s1, v4 ; D2820004 04100305 s_movk_i32 s0, 0x464 ; B0000464 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s0, v1, -v4 ; D2820005 84120200 s_buffer_load_dword s0, s[4:7], 0x21 ; C2000521 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s0, v7 ; 100E0E00 s_buffer_load_dword s0, s[4:7], 0x20 ; C2000520 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v6, s0, v7 ; D2820006 041C0106 s_buffer_load_dword s0, s[4:7], 0x22 ; C2000522 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s0, v6 ; D2820000 04180100 s_buffer_load_dword s0, s[4:7], 0x23 ; C2000523 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s0 ; 7E0C0200 v_mad_f32 v0, v6, s1, v0 ; D2820000 04000306 s_movk_i32 s0, 0x460 ; B0000460 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s0, v1, v0 ; D2820006 04020200 exp 15, 12, 0, 0, 0, v6, v5, v3, v1 ; F80000CF 01030506 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v4 ; 10060804 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v0, v3 ; D2820003 040E0004 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v2, v3 ; D2820003 040E0404 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v1, v3 ; D2820003 040E0204 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v4 ; 100A0804 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v0, v5 ; D2820005 04160004 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v2, v5 ; D2820005 04160404 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v1, v5 ; D2820005 04160204 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v4 ; 100C0804 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v0, v6 ; D2820006 041A0004 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v2, v6 ; D2820006 041A0404 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v1, v6 ; D2820006 041A0204 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v4 ; 100E0804 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v0, v7 ; D2820007 041E0004 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v2, v7 ; D2820007 041E0404 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v1, v7 ; D2820007 041E0204 exp 15, 13, 0, 0, 0, v7, v6, v5, v3 ; F80000DF 03050607 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v3, s4, v4 ; 10060804 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v0, v3 ; D2820003 040E0004 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v2, v3 ; D2820003 040E0404 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v1, v3 ; D2820003 040E0204 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v4 ; 100A0804 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v0, v5 ; D2820005 04160004 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v2, v5 ; D2820005 04160404 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v1, v5 ; D2820005 04160204 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v4 ; 100C0804 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v0, v6 ; D2820006 041A0004 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v2, v6 ; D2820006 041A0404 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v1, v6 ; D2820006 041A0204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v4 ; 10080804 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s4, v0, v4 ; D2820000 04120004 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s4, v2, v0 ; D2820000 04020404 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s0, v1, v0 ; D2820000 04020200 exp 15, 14, 0, 1, 0, v0, v6, v5, v3 ; F80008EF 03050600 s_endpgm ; BF810000 FRAG DCL OUT[0], COLOR IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xyyx 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 0.000000e+00) %23 = bitcast i32 %22 to float %24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 1.000000e+00) %25 = bitcast i32 %24 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25) ret void } ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: v_cvt_pkrtz_f16_f32_e64 v0, 0, 1.0 ; D25E0000 0001E480 v_cvt_pkrtz_f16_f32_e64 v1, 1.0, 0 ; D25E0001 000100F2 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[9] DCL CONST[0..237] DCL TEMP[0..7], LOCAL DCL ADDR[0] IMM[0] FLT32 { 765.0059, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 0, 0} 0: ADD TEMP[0].xy, IN[2].xyyy, CONST[0].yyyy 1: MUL TEMP[1].xyz, IN[3].zyxx, IMM[0].xxxx 2: MUL TEMP[2].xy, TEMP[0].xyyy, IMM[0].yyyy 3: MOV TEMP[3].xyz, TEMP[1].xyzx 4: ADD TEMP[4].x, TEMP[2].yyyy, TEMP[2].xxxx 5: F2I TEMP[5].x, TEMP[1].yyyy 6: UARL ADDR[0].x, TEMP[5].xxxx 7: UARL ADDR[0].x, TEMP[5].xxxx 8: MUL TEMP[1], TEMP[2].yyyy, CONST[ADDR[0].x+67] 9: ADD TEMP[4].x, -TEMP[4].xxxx, CONST[0].yyyy 10: F2I TEMP[5].x, TEMP[3].xxxx 11: UARL ADDR[0].x, TEMP[5].xxxx 12: UARL ADDR[0].x, TEMP[5].xxxx 13: MAD TEMP[1], CONST[ADDR[0].x+67], TEMP[2].xxxx, TEMP[1] 14: F2I TEMP[5].x, TEMP[3].zzzz 15: UARL ADDR[0].x, TEMP[5].xxxx 16: UARL ADDR[0].x, TEMP[5].xxxx 17: MAD TEMP[1], CONST[ADDR[0].x+67], TEMP[4].xxxx, TEMP[1] 18: MAD TEMP[0].xyz, IN[4].xyzz, CONST[66].xxxx, IN[0].xyzz 19: MOV TEMP[0].w, IN[0].wwww 20: DP4 TEMP[1].x, TEMP[0], TEMP[1] 21: F2I TEMP[5].x, TEMP[3].yyyy 22: UADD TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx 23: UARL ADDR[0].x, TEMP[5].xxxx 24: UARL ADDR[0].x, TEMP[5].xxxx 25: MUL TEMP[5], TEMP[2].yyyy, CONST[ADDR[0].x+67] 26: F2I TEMP[6].x, TEMP[3].xxxx 27: UADD TEMP[6].x, TEMP[6].xxxx, IMM[1].xxxx 28: UARL ADDR[0].x, TEMP[6].xxxx 29: UARL ADDR[0].x, TEMP[6].xxxx 30: MAD TEMP[6], CONST[ADDR[0].x+67], TEMP[2].xxxx, TEMP[5] 31: F2I TEMP[7].x, TEMP[3].yyyy 32: UADD TEMP[7].x, TEMP[7].xxxx, IMM[1].yyyy 33: UARL ADDR[0].x, TEMP[7].xxxx 34: UARL ADDR[0].x, TEMP[7].xxxx 35: MUL TEMP[5], TEMP[2].yyyy, CONST[ADDR[0].x+67] 36: F2I TEMP[7].x, TEMP[3].zzzz 37: UADD TEMP[7].x, TEMP[7].xxxx, IMM[1].xxxx 38: UARL ADDR[0].x, TEMP[7].xxxx 39: UARL ADDR[0].x, TEMP[7].xxxx 40: MAD TEMP[6], CONST[ADDR[0].x+67], TEMP[4].xxxx, TEMP[6] 41: F2I TEMP[7].x, TEMP[3].xxxx 42: UADD TEMP[7].x, TEMP[7].xxxx, IMM[1].yyyy 43: UARL ADDR[0].x, TEMP[7].xxxx 44: UARL ADDR[0].x, TEMP[7].xxxx 45: MAD TEMP[5], CONST[ADDR[0].x+67], TEMP[2].xxxx, TEMP[5] 46: DP4 TEMP[2].x, TEMP[0], TEMP[6] 47: MOV TEMP[1].y, TEMP[2].xxxx 48: F2I TEMP[2].x, TEMP[3].zzzz 49: UADD TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy 50: UARL ADDR[0].x, TEMP[2].xxxx 51: UARL ADDR[0].x, TEMP[2].xxxx 52: MAD TEMP[5], CONST[ADDR[0].x+67], TEMP[4].xxxx, TEMP[5] 53: DP4 TEMP[0].x, TEMP[0], TEMP[5] 54: MOV TEMP[1].z, TEMP[0].xxxx 55: MOV TEMP[1].w, CONST[0].yyyy 56: DP4 TEMP[0].x, TEMP[1], CONST[8] 57: DP4 TEMP[2].x, TEMP[1], CONST[9] 58: MOV TEMP[0].y, TEMP[2].xxxx 59: DP4 TEMP[3].x, TEMP[1], CONST[10] 60: MOV TEMP[0].z, TEMP[3].xxxx 61: DP4 TEMP[1].x, TEMP[1], CONST[11] 62: MOV TEMP[0].w, TEMP[1].xxxx 63: MOV TEMP[4].xy, IN[1].xyxx 64: MOV TEMP[5], TEMP[0] 65: MAD TEMP[3].x, TEMP[3].xxxx, CONST[0].zzzz, -TEMP[1].xxxx 66: MOV TEMP[0].z, TEMP[3].xxxx 67: MOV TEMP[0].y, -TEMP[2].xxxx 68: MAD TEMP[0].xy, CONST[237].xyyy, TEMP[1].xxxx, TEMP[0].xyyy 69: MOV OUT[2], TEMP[4] 70: MOV OUT[0], TEMP[0] 71: MOV OUT[1], TEMP[5] 72: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1056) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3792) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3796) %34 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %35 = load <16 x i8> addrspace(2)* %34, !tbaa !0 %36 = add i32 %5, %7 %37 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %35, i32 0, i32 %36) %38 = extractelement <4 x float> %37, i32 0 %39 = extractelement <4 x float> %37, i32 1 %40 = extractelement <4 x float> %37, i32 2 %41 = extractelement <4 x float> %37, i32 3 %42 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0 %44 = add i32 %5, %7 %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %44) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %49 = load <16 x i8> addrspace(2)* %48, !tbaa !0 %50 = add i32 %5, %7 %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %50) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %55 = load <16 x i8> addrspace(2)* %54, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %62 = load <16 x i8> addrspace(2)* %61, !tbaa !0 %63 = add i32 %5, %7 %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %62, i32 0, i32 %63) %65 = extractelement <4 x float> %64, i32 0 %66 = extractelement <4 x float> %64, i32 1 %67 = extractelement <4 x float> %64, i32 2 %68 = fadd float %52, %13 %69 = fadd float %53, %13 %70 = fmul float %60, 0x4087E80C00000000 %71 = fmul float %59, 0x4087E80C00000000 %72 = fmul float %58, 0x4087E80C00000000 %73 = fmul float %68, 0x3F00000000000000 %74 = fmul float %69, 0x3F00000000000000 %75 = fadd float %74, %73 %76 = fptosi float %71 to i32 %77 = bitcast i32 %76 to float %78 = bitcast float %77 to i32 %79 = shl i32 %78, 4 %80 = add i32 %79, 1072 %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %80) %82 = fmul float %74, %81 %83 = shl i32 %78, 4 %84 = add i32 %83, 1076 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = fmul float %74, %85 %87 = shl i32 %78, 4 %88 = add i32 %87, 1080 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = fmul float %74, %89 %91 = shl i32 %78, 4 %92 = add i32 %91, 1084 %93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %92) %94 = fmul float %74, %93 %95 = fsub float -0.000000e+00, %75 %96 = fadd float %95, %13 %97 = fptosi float %70 to i32 %98 = bitcast i32 %97 to float %99 = bitcast float %98 to i32 %100 = shl i32 %99, 4 %101 = add i32 %100, 1072 %102 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %101) %103 = fmul float %102, %73 %104 = fadd float %103, %82 %105 = shl i32 %99, 4 %106 = add i32 %105, 1076 %107 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %106) %108 = fmul float %107, %73 %109 = fadd float %108, %86 %110 = shl i32 %99, 4 %111 = add i32 %110, 1080 %112 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %111) %113 = fmul float %112, %73 %114 = fadd float %113, %90 %115 = shl i32 %99, 4 %116 = add i32 %115, 1084 %117 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %116) %118 = fmul float %117, %73 %119 = fadd float %118, %94 %120 = fptosi float %72 to i32 %121 = bitcast i32 %120 to float %122 = bitcast float %121 to i32 %123 = shl i32 %122, 4 %124 = add i32 %123, 1072 %125 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %124) %126 = fmul float %125, %96 %127 = fadd float %126, %104 %128 = shl i32 %122, 4 %129 = add i32 %128, 1076 %130 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %129) %131 = fmul float %130, %96 %132 = fadd float %131, %109 %133 = shl i32 %122, 4 %134 = add i32 %133, 1080 %135 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %134) %136 = fmul float %135, %96 %137 = fadd float %136, %114 %138 = shl i32 %122, 4 %139 = add i32 %138, 1084 %140 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %139) %141 = fmul float %140, %96 %142 = fadd float %141, %119 %143 = fmul float %65, %31 %144 = fadd float %143, %38 %145 = fmul float %66, %31 %146 = fadd float %145, %39 %147 = fmul float %67, %31 %148 = fadd float %147, %40 %149 = fmul float %144, %127 %150 = fmul float %146, %132 %151 = fadd float %149, %150 %152 = fmul float %148, %137 %153 = fadd float %151, %152 %154 = fmul float %41, %142 %155 = fadd float %153, %154 %156 = fptosi float %71 to i32 %157 = bitcast i32 %156 to float %158 = bitcast float %157 to i32 %159 = add i32 %158, 1 %160 = bitcast i32 %159 to float %161 = bitcast float %160 to i32 %162 = shl i32 %161, 4 %163 = add i32 %162, 1072 %164 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %163) %165 = fmul float %74, %164 %166 = shl i32 %161, 4 %167 = add i32 %166, 1076 %168 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %167) %169 = fmul float %74, %168 %170 = shl i32 %161, 4 %171 = add i32 %170, 1080 %172 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %171) %173 = fmul float %74, %172 %174 = shl i32 %161, 4 %175 = add i32 %174, 1084 %176 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %175) %177 = fmul float %74, %176 %178 = fptosi float %70 to i32 %179 = bitcast i32 %178 to float %180 = bitcast float %179 to i32 %181 = add i32 %180, 1 %182 = bitcast i32 %181 to float %183 = bitcast float %182 to i32 %184 = shl i32 %183, 4 %185 = add i32 %184, 1072 %186 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %185) %187 = fmul float %186, %73 %188 = fadd float %187, %165 %189 = shl i32 %183, 4 %190 = add i32 %189, 1076 %191 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %190) %192 = fmul float %191, %73 %193 = fadd float %192, %169 %194 = shl i32 %183, 4 %195 = add i32 %194, 1080 %196 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %195) %197 = fmul float %196, %73 %198 = fadd float %197, %173 %199 = shl i32 %183, 4 %200 = add i32 %199, 1084 %201 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %200) %202 = fmul float %201, %73 %203 = fadd float %202, %177 %204 = fptosi float %71 to i32 %205 = bitcast i32 %204 to float %206 = bitcast float %205 to i32 %207 = add i32 %206, 2 %208 = bitcast i32 %207 to float %209 = bitcast float %208 to i32 %210 = shl i32 %209, 4 %211 = add i32 %210, 1072 %212 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %211) %213 = fmul float %74, %212 %214 = shl i32 %209, 4 %215 = add i32 %214, 1076 %216 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %215) %217 = fmul float %74, %216 %218 = shl i32 %209, 4 %219 = add i32 %218, 1080 %220 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %219) %221 = fmul float %74, %220 %222 = shl i32 %209, 4 %223 = add i32 %222, 1084 %224 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %223) %225 = fmul float %74, %224 %226 = fptosi float %72 to i32 %227 = bitcast i32 %226 to float %228 = bitcast float %227 to i32 %229 = add i32 %228, 1 %230 = bitcast i32 %229 to float %231 = bitcast float %230 to i32 %232 = shl i32 %231, 4 %233 = add i32 %232, 1072 %234 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %233) %235 = fmul float %234, %96 %236 = fadd float %235, %188 %237 = shl i32 %231, 4 %238 = add i32 %237, 1076 %239 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %238) %240 = fmul float %239, %96 %241 = fadd float %240, %193 %242 = shl i32 %231, 4 %243 = add i32 %242, 1080 %244 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %243) %245 = fmul float %244, %96 %246 = fadd float %245, %198 %247 = shl i32 %231, 4 %248 = add i32 %247, 1084 %249 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %248) %250 = fmul float %249, %96 %251 = fadd float %250, %203 %252 = fptosi float %70 to i32 %253 = bitcast i32 %252 to float %254 = bitcast float %253 to i32 %255 = add i32 %254, 2 %256 = bitcast i32 %255 to float %257 = bitcast float %256 to i32 %258 = shl i32 %257, 4 %259 = add i32 %258, 1072 %260 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %259) %261 = fmul float %260, %73 %262 = fadd float %261, %213 %263 = shl i32 %257, 4 %264 = add i32 %263, 1076 %265 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %264) %266 = fmul float %265, %73 %267 = fadd float %266, %217 %268 = shl i32 %257, 4 %269 = add i32 %268, 1080 %270 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %269) %271 = fmul float %270, %73 %272 = fadd float %271, %221 %273 = shl i32 %257, 4 %274 = add i32 %273, 1084 %275 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %274) %276 = fmul float %275, %73 %277 = fadd float %276, %225 %278 = fmul float %144, %236 %279 = fmul float %146, %241 %280 = fadd float %278, %279 %281 = fmul float %148, %246 %282 = fadd float %280, %281 %283 = fmul float %41, %251 %284 = fadd float %282, %283 %285 = fptosi float %72 to i32 %286 = bitcast i32 %285 to float %287 = bitcast float %286 to i32 %288 = add i32 %287, 2 %289 = bitcast i32 %288 to float %290 = bitcast float %289 to i32 %291 = shl i32 %290, 4 %292 = add i32 %291, 1072 %293 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %292) %294 = fmul float %293, %96 %295 = fadd float %294, %262 %296 = shl i32 %290, 4 %297 = add i32 %296, 1076 %298 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %297) %299 = fmul float %298, %96 %300 = fadd float %299, %267 %301 = shl i32 %290, 4 %302 = add i32 %301, 1080 %303 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %302) %304 = fmul float %303, %96 %305 = fadd float %304, %272 %306 = shl i32 %290, 4 %307 = add i32 %306, 1084 %308 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %307) %309 = fmul float %308, %96 %310 = fadd float %309, %277 %311 = fmul float %144, %295 %312 = fmul float %146, %300 %313 = fadd float %311, %312 %314 = fmul float %148, %305 %315 = fadd float %313, %314 %316 = fmul float %41, %310 %317 = fadd float %315, %316 %318 = fmul float %155, %15 %319 = fmul float %284, %16 %320 = fadd float %318, %319 %321 = fmul float %317, %17 %322 = fadd float %320, %321 %323 = fmul float %13, %18 %324 = fadd float %322, %323 %325 = fmul float %155, %19 %326 = fmul float %284, %20 %327 = fadd float %325, %326 %328 = fmul float %317, %21 %329 = fadd float %327, %328 %330 = fmul float %13, %22 %331 = fadd float %329, %330 %332 = fmul float %155, %23 %333 = fmul float %284, %24 %334 = fadd float %332, %333 %335 = fmul float %317, %25 %336 = fadd float %334, %335 %337 = fmul float %13, %26 %338 = fadd float %336, %337 %339 = fmul float %155, %27 %340 = fmul float %284, %28 %341 = fadd float %339, %340 %342 = fmul float %317, %29 %343 = fadd float %341, %342 %344 = fmul float %13, %30 %345 = fadd float %343, %344 %346 = fsub float -0.000000e+00, %345 %347 = fmul float %338, %14 %348 = fadd float %347, %346 %349 = fsub float -0.000000e+00, %331 %350 = fmul float %32, %345 %351 = fadd float %350, %324 %352 = fmul float %33, %345 %353 = fadd float %352, %349 %354 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 16 %355 = load <16 x i8> addrspace(2)* %354, !tbaa !0 %356 = call float @llvm.SI.load.const(<16 x i8> %355, i32 0) %357 = fmul float %356, %324 %358 = call float @llvm.SI.load.const(<16 x i8> %355, i32 4) %359 = fmul float %358, %331 %360 = fadd float %357, %359 %361 = call float @llvm.SI.load.const(<16 x i8> %355, i32 8) %362 = fmul float %361, %338 %363 = fadd float %360, %362 %364 = call float @llvm.SI.load.const(<16 x i8> %355, i32 12) %365 = fmul float %364, %345 %366 = fadd float %363, %365 %367 = call float @llvm.SI.load.const(<16 x i8> %355, i32 16) %368 = fmul float %367, %324 %369 = call float @llvm.SI.load.const(<16 x i8> %355, i32 20) %370 = fmul float %369, %331 %371 = fadd float %368, %370 %372 = call float @llvm.SI.load.const(<16 x i8> %355, i32 24) %373 = fmul float %372, %338 %374 = fadd float %371, %373 %375 = call float @llvm.SI.load.const(<16 x i8> %355, i32 28) %376 = fmul float %375, %345 %377 = fadd float %374, %376 %378 = call float @llvm.SI.load.const(<16 x i8> %355, i32 32) %379 = fmul float %378, %324 %380 = call float @llvm.SI.load.const(<16 x i8> %355, i32 36) %381 = fmul float %380, %331 %382 = fadd float %379, %381 %383 = call float @llvm.SI.load.const(<16 x i8> %355, i32 40) %384 = fmul float %383, %338 %385 = fadd float %382, %384 %386 = call float @llvm.SI.load.const(<16 x i8> %355, i32 44) %387 = fmul float %386, %345 %388 = fadd float %385, %387 %389 = call float @llvm.SI.load.const(<16 x i8> %355, i32 48) %390 = fmul float %389, %324 %391 = call float @llvm.SI.load.const(<16 x i8> %355, i32 52) %392 = fmul float %391, %331 %393 = fadd float %390, %392 %394 = call float @llvm.SI.load.const(<16 x i8> %355, i32 56) %395 = fmul float %394, %338 %396 = fadd float %393, %395 %397 = call float @llvm.SI.load.const(<16 x i8> %355, i32 60) %398 = fmul float %397, %345 %399 = fadd float %396, %398 %400 = call float @llvm.SI.load.const(<16 x i8> %355, i32 64) %401 = fmul float %400, %324 %402 = call float @llvm.SI.load.const(<16 x i8> %355, i32 68) %403 = fmul float %402, %331 %404 = fadd float %401, %403 %405 = call float @llvm.SI.load.const(<16 x i8> %355, i32 72) %406 = fmul float %405, %338 %407 = fadd float %404, %406 %408 = call float @llvm.SI.load.const(<16 x i8> %355, i32 76) %409 = fmul float %408, %345 %410 = fadd float %407, %409 %411 = call float @llvm.SI.load.const(<16 x i8> %355, i32 80) %412 = fmul float %411, %324 %413 = call float @llvm.SI.load.const(<16 x i8> %355, i32 84) %414 = fmul float %413, %331 %415 = fadd float %412, %414 %416 = call float @llvm.SI.load.const(<16 x i8> %355, i32 88) %417 = fmul float %416, %338 %418 = fadd float %415, %417 %419 = call float @llvm.SI.load.const(<16 x i8> %355, i32 92) %420 = fmul float %419, %345 %421 = fadd float %418, %420 %422 = call float @llvm.SI.load.const(<16 x i8> %355, i32 96) %423 = fmul float %422, %324 %424 = call float @llvm.SI.load.const(<16 x i8> %355, i32 100) %425 = fmul float %424, %331 %426 = fadd float %423, %425 %427 = call float @llvm.SI.load.const(<16 x i8> %355, i32 104) %428 = fmul float %427, %338 %429 = fadd float %426, %428 %430 = call float @llvm.SI.load.const(<16 x i8> %355, i32 108) %431 = fmul float %430, %345 %432 = fadd float %429, %431 %433 = call float @llvm.SI.load.const(<16 x i8> %355, i32 112) %434 = fmul float %433, %324 %435 = call float @llvm.SI.load.const(<16 x i8> %355, i32 116) %436 = fmul float %435, %331 %437 = fadd float %434, %436 %438 = call float @llvm.SI.load.const(<16 x i8> %355, i32 120) %439 = fmul float %438, %338 %440 = fadd float %437, %439 %441 = call float @llvm.SI.load.const(<16 x i8> %355, i32 124) %442 = fmul float %441, %345 %443 = fadd float %440, %442 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %46, float %47, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %351, float %353, float %348, float %345) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %366, float %377, float %388, float %399) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %410, float %421, float %432, float %443) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v5, v5 ; F800020F 05050201 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_movk_i32 s0, 0x420 ; B0000420 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s0, s[4:7], s0 ; C2000400 buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v9, v6, s0, v2 ; D2820009 04080106 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_buffer_load_dword s1, s[4:7], 0x1 ; C2008501 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v14, s1, v11 ; 061C1601 v_mov_b32_e32 v15, 0x38000000 ; 7E1E02FF 38000000 v_mul_f32_e32 v16, 0x38000000, v14 ; 10201CFF 38000000 buffer_load_format_xyzw v[17:20], v0, s[20:23], 0 idxen ; E00C2000 80051100 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, 0x443f4060, v18 ; 100024FF 443F4060 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 s_movk_i32 s8, 0x434 ; B0080434 v_add_i32_e32 v21, s8, v0 ; 4A2A0008 buffer_load_dword v21, v21, s[4:7], 0 offen ; E0301000 80011515 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v21, v16 ; 102A2115 v_add_f32_e32 v10, s1, v10 ; 06141401 v_mul_f32_e32 v10, 0x38000000, v10 ; 101414FF 38000000 v_mul_f32_e32 v11, 0x443f4060, v19 ; 101626FF 443F4060 v_cvt_i32_f32_e32 v11, v11 ; 7E16110B v_lshlrev_b32_e32 v11, 4, v11 ; 34161684 v_add_i32_e32 v12, s8, v11 ; 4A181608 buffer_load_dword v12, v12, s[4:7], 0 offen ; E0301000 80010C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v12, v10, v21 ; D282000C 0456150C v_mad_f32 v13, v14, v15, v10 ; D282000D 042A1F0E v_sub_f32_e32 v13, s1, v13 ; 081A1A01 v_mul_f32_e32 v14, 0x443f4060, v17 ; 101C22FF 443F4060 v_cvt_i32_f32_e32 v14, v14 ; 7E1C110E v_lshlrev_b32_e32 v14, 4, v14 ; 341C1C84 v_add_i32_e32 v15, s8, v14 ; 4A1E1C08 buffer_load_dword v15, v15, s[4:7], 0 offen ; E0301000 80010F0F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v15, v13, v12 ; D282000C 04321B0F v_mul_f32_e32 v12, v12, v9 ; 1018130C v_mad_f32 v15, v5, s0, v1 ; D282000F 04040105 s_movk_i32 s8, 0x430 ; B0080430 v_add_i32_e32 v17, s8, v0 ; 4A220008 buffer_load_dword v17, v17, s[4:7], 0 offen ; E0301000 80011111 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v17, v17, v16 ; 10222111 v_add_i32_e32 v18, s8, v11 ; 4A241608 buffer_load_dword v18, v18, s[4:7], 0 offen ; E0301000 80011212 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v17, v18, v10, v17 ; D2820011 04461512 v_add_i32_e32 v18, s8, v14 ; 4A241C08 buffer_load_dword v18, v18, s[4:7], 0 offen ; E0301000 80011212 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v17, v18, v13, v17 ; D2820011 04461B12 v_mad_f32 v12, v15, v17, v12 ; D282000C 0432230F v_mad_f32 v5, v7, s0, v3 ; D2820005 040C0107 s_movk_i32 s0, 0x438 ; B0000438 v_add_i32_e32 v6, s0, v0 ; 4A0C0000 buffer_load_dword v6, v6, s[4:7], 0 offen ; E0301000 80010606 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v6, v6, v16 ; 100C2106 v_add_i32_e32 v7, s0, v11 ; 4A0E1600 buffer_load_dword v7, v7, s[4:7], 0 offen ; E0301000 80010707 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, v7, v10, v6 ; D2820006 041A1507 v_add_i32_e32 v7, s0, v14 ; 4A0E1C00 buffer_load_dword v7, v7, s[4:7], 0 offen ; E0301000 80010707 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, v7, v13, v6 ; D2820006 041A1B07 v_mad_f32 v6, v5, v6, v12 ; D2820006 04320D05 s_movk_i32 s0, 0x43c ; B000043C v_add_i32_e32 v7, s0, v0 ; 4A0E0000 buffer_load_dword v7, v7, s[4:7], 0 offen ; E0301000 80010707 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v7, v7, v16 ; 100E2107 v_add_i32_e32 v8, s0, v11 ; 4A101600 buffer_load_dword v8, v8, s[4:7], 0 offen ; E0301000 80010808 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v8, v10, v7 ; D2820007 041E1508 v_add_i32_e32 v8, s0, v14 ; 4A101C00 buffer_load_dword v8, v8, s[4:7], 0 offen ; E0301000 80010808 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v8, v13, v7 ; D2820007 041E1B08 v_mad_f32 v6, v4, v7, v6 ; D2820006 041A0F04 s_movk_i32 s0, 0x444 ; B0000444 v_add_i32_e32 v7, s0, v0 ; 4A0E0000 buffer_load_dword v7, v7, s[4:7], 0 offen ; E0301000 80010707 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v7, v7, v16 ; 100E2107 v_add_i32_e32 v8, s0, v11 ; 4A101600 buffer_load_dword v8, v8, s[4:7], 0 offen ; E0301000 80010808 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v8, v10, v7 ; D2820007 041E1508 v_add_i32_e32 v8, s0, v14 ; 4A101C00 buffer_load_dword v8, v8, s[4:7], 0 offen ; E0301000 80010808 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v8, v13, v7 ; D2820007 041E1B08 v_mul_f32_e32 v7, v7, v9 ; 100E1307 s_movk_i32 s0, 0x440 ; B0000440 v_add_i32_e32 v8, s0, v0 ; 4A100000 buffer_load_dword v8, v8, s[4:7], 0 offen ; E0301000 80010808 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v8, v8, v16 ; 10102108 v_add_i32_e32 v12, s0, v11 ; 4A181600 buffer_load_dword v12, v12, s[4:7], 0 offen ; E0301000 80010C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v8, v12, v10, v8 ; D2820008 0422150C v_add_i32_e32 v12, s0, v14 ; 4A181C00 buffer_load_dword v12, v12, s[4:7], 0 offen ; E0301000 80010C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v8, v12, v13, v8 ; D2820008 04221B0C v_mad_f32 v7, v15, v8, v7 ; D2820007 041E110F s_movk_i32 s0, 0x448 ; B0000448 v_add_i32_e32 v8, s0, v0 ; 4A100000 buffer_load_dword v8, v8, s[4:7], 0 offen ; E0301000 80010808 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v8, v8, v16 ; 10102108 v_add_i32_e32 v12, s0, v11 ; 4A181600 buffer_load_dword v12, v12, s[4:7], 0 offen ; E0301000 80010C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v8, v12, v10, v8 ; D2820008 0422150C v_add_i32_e32 v12, s0, v14 ; 4A181C00 buffer_load_dword v12, v12, s[4:7], 0 offen ; E0301000 80010C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v8, v12, v13, v8 ; D2820008 04221B0C v_mad_f32 v7, v5, v8, v7 ; D2820007 041E1105 s_movk_i32 s0, 0x44c ; B000044C v_add_i32_e32 v8, s0, v0 ; 4A100000 buffer_load_dword v8, v8, s[4:7], 0 offen ; E0301000 80010808 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v8, v8, v16 ; 10102108 v_add_i32_e32 v12, s0, v11 ; 4A181600 buffer_load_dword v12, v12, s[4:7], 0 offen ; E0301000 80010C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v8, v12, v10, v8 ; D2820008 0422150C v_add_i32_e32 v12, s0, v14 ; 4A181C00 buffer_load_dword v12, v12, s[4:7], 0 offen ; E0301000 80010C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v8, v12, v13, v8 ; D2820008 04221B0C v_mad_f32 v7, v4, v8, v7 ; D2820007 041E1104 s_buffer_load_dword s0, s[4:7], 0x2d ; C200052D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s0, v7 ; 10100E00 s_buffer_load_dword s0, s[4:7], 0x2c ; C200052C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v6, s0, v8 ; D2820008 04200106 s_movk_i32 s0, 0x454 ; B0000454 v_add_i32_e32 v12, s0, v0 ; 4A180000 buffer_load_dword v12, v12, s[4:7], 0 offen ; E0301000 80010C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v12, v12, v16 ; 1018210C v_add_i32_e32 v17, s0, v11 ; 4A221600 buffer_load_dword v17, v17, s[4:7], 0 offen ; E0301000 80011111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v17, v10, v12 ; D282000C 04321511 v_add_i32_e32 v17, s0, v14 ; 4A221C00 buffer_load_dword v17, v17, s[4:7], 0 offen ; E0301000 80011111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v17, v13, v12 ; D282000C 04321B11 v_mul_f32_e32 v9, v12, v9 ; 1012130C s_movk_i32 s0, 0x450 ; B0000450 v_add_i32_e32 v12, s0, v0 ; 4A180000 buffer_load_dword v12, v12, s[4:7], 0 offen ; E0301000 80010C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v12, v12, v16 ; 1018210C v_add_i32_e32 v17, s0, v11 ; 4A221600 buffer_load_dword v17, v17, s[4:7], 0 offen ; E0301000 80011111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v17, v10, v12 ; D282000C 04321511 v_add_i32_e32 v17, s0, v14 ; 4A221C00 buffer_load_dword v17, v17, s[4:7], 0 offen ; E0301000 80011111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v17, v13, v12 ; D282000C 04321B11 v_mad_f32 v9, v15, v12, v9 ; D2820009 0426190F s_movk_i32 s0, 0x458 ; B0000458 v_add_i32_e32 v12, s0, v0 ; 4A180000 buffer_load_dword v12, v12, s[4:7], 0 offen ; E0301000 80010C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v12, v12, v16 ; 1018210C v_add_i32_e32 v15, s0, v11 ; 4A1E1600 buffer_load_dword v15, v15, s[4:7], 0 offen ; E0301000 80010F0F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v15, v10, v12 ; D282000C 0432150F v_add_i32_e32 v15, s0, v14 ; 4A1E1C00 buffer_load_dword v15, v15, s[4:7], 0 offen ; E0301000 80010F0F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v15, v13, v12 ; D282000C 04321B0F v_mad_f32 v5, v5, v12, v9 ; D2820005 04261905 s_movk_i32 s0, 0x45c ; B000045C v_add_i32_e32 v0, s0, v0 ; 4A000000 buffer_load_dword v0, v0, s[4:7], 0 offen ; E0301000 80010000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v16 ; 10002100 v_add_i32_e32 v9, s0, v11 ; 4A121600 buffer_load_dword v9, v9, s[4:7], 0 offen ; E0301000 80010909 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v9, v10, v0 ; D2820000 04021509 v_add_i32_e32 v9, s0, v14 ; 4A121C00 buffer_load_dword v9, v9, s[4:7], 0 offen ; E0301000 80010909 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v9, v13, v0 ; D2820000 04021B09 v_mad_f32 v0, v4, v0, v5 ; D2820000 04160104 s_buffer_load_dword s0, s[4:7], 0x2e ; C200052E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v0, s0, v8 ; D2820001 04200100 s_buffer_load_dword s0, s[4:7], 0x2f ; C200052F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s0 ; 7E040200 v_mad_f32 v1, v2, s1, v1 ; D2820001 04040302 s_buffer_load_dword s0, s[4:7], 0x29 ; C2000529 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s0, v7 ; 10040E00 s_buffer_load_dword s0, s[4:7], 0x28 ; C2000528 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v6, s0, v2 ; D2820002 04080106 s_buffer_load_dword s0, s[4:7], 0x2a ; C200052A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s0, v2 ; D2820002 04080100 s_buffer_load_dword s0, s[4:7], 0x2b ; C200052B s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s0 ; 7E060200 v_mad_f32 v2, v3, s1, v2 ; D2820002 04080303 s_buffer_load_dword s0, s[4:7], 0x2 ; C2000502 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v2, s0, -v1 ; D2820003 84040102 s_buffer_load_dword s0, s[4:7], 0x25 ; C2000525 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s0, v7 ; 10080E00 s_buffer_load_dword s0, s[4:7], 0x24 ; C2000524 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v6, s0, v4 ; D2820004 04100106 s_buffer_load_dword s0, s[4:7], 0x26 ; C2000526 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s0, v4 ; D2820004 04100100 s_buffer_load_dword s0, s[4:7], 0x27 ; C2000527 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s0 ; 7E0A0200 v_mad_f32 v4, v5, s1, v4 ; D2820004 04100305 s_movk_i32 s0, 0xed4 ; B0000ED4 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s0, v1, -v4 ; D2820005 84120200 s_buffer_load_dword s0, s[4:7], 0x21 ; C2000521 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s0, v7 ; 100E0E00 s_buffer_load_dword s0, s[4:7], 0x20 ; C2000520 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v6, s0, v7 ; D2820006 041C0106 s_buffer_load_dword s0, s[4:7], 0x22 ; C2000522 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s0, v6 ; D2820000 04180100 s_buffer_load_dword s0, s[4:7], 0x23 ; C2000523 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s0 ; 7E0C0200 v_mad_f32 v0, v6, s1, v0 ; D2820000 04000306 s_movk_i32 s0, 0xed0 ; B0000ED0 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s0, v1, v0 ; D2820006 04020200 exp 15, 12, 0, 0, 0, v6, v5, v3, v1 ; F80000CF 01030506 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v4 ; 10060804 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v0, v3 ; D2820003 040E0004 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v2, v3 ; D2820003 040E0404 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v1, v3 ; D2820003 040E0204 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v4 ; 100A0804 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v0, v5 ; D2820005 04160004 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v2, v5 ; D2820005 04160404 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v1, v5 ; D2820005 04160204 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v4 ; 100C0804 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v0, v6 ; D2820006 041A0004 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v2, v6 ; D2820006 041A0404 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v1, v6 ; D2820006 041A0204 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v4 ; 100E0804 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v0, v7 ; D2820007 041E0004 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v2, v7 ; D2820007 041E0404 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v1, v7 ; D2820007 041E0204 exp 15, 13, 0, 0, 0, v7, v6, v5, v3 ; F80000DF 03050607 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v3, s4, v4 ; 10060804 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v0, v3 ; D2820003 040E0004 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v2, v3 ; D2820003 040E0404 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v1, v3 ; D2820003 040E0204 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v4 ; 100A0804 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v0, v5 ; D2820005 04160004 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v2, v5 ; D2820005 04160404 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v1, v5 ; D2820005 04160204 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v4 ; 100C0804 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v0, v6 ; D2820006 041A0004 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v2, v6 ; D2820006 041A0404 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v1, v6 ; D2820006 041A0204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v4 ; 10080804 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s4, v0, v4 ; D2820000 04120004 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s4, v2, v0 ; D2820000 04020404 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s0, v1, v0 ; D2820000 04020200 exp 15, 14, 0, 1, 0, v0, v6, v5, v3 ; F80008EF 03050600 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: ADD TEMP[1].xyz, TEMP[0].wwww, -CONST[0].xxxx 3: FSLT TEMP[2].x, TEMP[1].xxxx, IMM[0].xxxx 4: FSLT TEMP[3].x, TEMP[1].yyyy, IMM[0].xxxx 5: OR TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx 6: FSLT TEMP[1].x, TEMP[1].zzzz, IMM[0].xxxx 7: OR TEMP[1].x, TEMP[2].xxxx, TEMP[1].xxxx 8: UIF TEMP[1].xxxx :0 9: KILL 10: ENDIF 11: MOV OUT[0], TEMP[0] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %26 = load <8 x i32> addrspace(2)* %25, !tbaa !0 %27 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %28 = load <4 x i32> addrspace(2)* %27, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = bitcast float %29 to i32 %32 = bitcast float %30 to i32 %33 = insertelement <2 x i32> undef, i32 %31, i32 0 %34 = insertelement <2 x i32> %33, i32 %32, i32 1 %35 = bitcast <8 x i32> %26 to <32 x i8> %36 = bitcast <4 x i32> %28 to <16 x i8> %37 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %34, <32 x i8> %35, <16 x i8> %36, i32 2) %38 = extractelement <4 x float> %37, i32 0 %39 = extractelement <4 x float> %37, i32 1 %40 = extractelement <4 x float> %37, i32 2 %41 = extractelement <4 x float> %37, i32 3 %42 = fsub float -0.000000e+00, %24 %43 = fadd float %41, %42 %44 = fsub float -0.000000e+00, %24 %45 = fadd float %41, %44 %46 = fsub float -0.000000e+00, %24 %47 = fadd float %41, %46 %48 = fcmp olt float %43, 0.000000e+00 %49 = sext i1 %48 to i32 %50 = bitcast i32 %49 to float %51 = fcmp olt float %45, 0.000000e+00 %52 = sext i1 %51 to i32 %53 = bitcast i32 %52 to float %54 = bitcast float %50 to i32 %55 = bitcast float %53 to i32 %56 = or i32 %54, %55 %57 = bitcast i32 %56 to float %58 = fcmp olt float %47, 0.000000e+00 %59 = sext i1 %58 to i32 %60 = bitcast i32 %59 to float %61 = bitcast float %57 to i32 %62 = bitcast float %60 to i32 %63 = or i32 %61, %62 %64 = bitcast i32 %63 to float %65 = bitcast float %64 to i32 %66 = icmp ne i32 %65, 0 br i1 %66, label %IF, label %ENDIF IF: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF ENDIF: ; preds = %main_body, %IF %67 = call i32 @llvm.SI.packf16(float %38, float %39) %68 = bitcast i32 %67 to float %69 = call i32 @llvm.SI.packf16(float %40, float %41) %70 = bitcast i32 %69 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %68, float %70, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kilp() ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800F00 00430002 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v4, s0, v3 ; 0A080600 v_cmp_lt_f32_e64 s[0:1], v4, 0 ; D0020000 00010104 s_and_saveexec_b64 s[0:1], s[0:1] ; BE802400 s_xor_b64 s[0:1], exec, s[0:1] ; 8980007E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[0:1] ; 88FE007E v_cvt_pkrtz_f16_f32_e32 v4, v2, v3 ; 5E080702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 FRAG 0: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) ret void } declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } Shader Disassembly: v_mov_b32_e32 v0, 0 ; 7E000280 exp 0, 0, 0, 1, 1, v0, v0, v0, v0 ; F8001800 00000000 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[9] DCL OUT[3], GENERIC[10] DCL OUT[4], GENERIC[11] DCL OUT[5], GENERIC[12] DCL OUT[6], GENERIC[13] DCL OUT[7], GENERIC[14] DCL OUT[8], GENERIC[15] DCL OUT[9], GENERIC[16] DCL OUT[10], GENERIC[17] DCL CONST[0..74] DCL TEMP[0..15], LOCAL IMM[0] FLT32 { 0.0000, -128.0000, 1.0000, -64.0000} IMM[1] FLT32 { -0.0159, 0.0159, 0.0001, 0.0000} 0: MOV TEMP[0].w, IMM[0].xxxx 1: ADD TEMP[1], IMM[0].yyyy, IN[1] 2: FSLT TEMP[2], TEMP[1], CONST[0].xxxx 3: AND TEMP[3], TEMP[2], IMM[0].zzzz 4: ABS TEMP[4], TEMP[1] 5: ADD TEMP[1], TEMP[4], -TEMP[3] 6: ADD TEMP[1], TEMP[1], IMM[0].wwww 7: MAD TEMP[3].xyz, TEMP[3].xzww, -CONST[0].zzzz, CONST[0].yyyy 8: FSLT TEMP[4], TEMP[1], CONST[0].xxxx 9: AND TEMP[4], TEMP[4], IMM[0].zzzz 10: ABS TEMP[5], TEMP[1] 11: ADD TEMP[1], TEMP[5], -TEMP[4] 12: MAD TEMP[5].xy, TEMP[1].xzzz, IMM[1].xxxx, IMM[0].zzzz 13: MAD TEMP[6].x, TEMP[1].yyyy, IMM[1].xxxx, TEMP[5].xxxx 14: MOV TEMP[5].w, TEMP[6].xxxx 15: MAD TEMP[6].x, TEMP[1].wwww, IMM[1].xxxx, TEMP[5].yyyy 16: MOV TEMP[6].z, TEMP[6].xxxx 17: MAD TEMP[4], TEMP[4], -CONST[0].zzzz, CONST[0].yyyy 18: MUL TEMP[1], TEMP[1], IMM[1].yyyy 19: MOV TEMP[5].xz, TEMP[1].xxyx 20: DP3 TEMP[7].x, TEMP[5].xzww, TEMP[5].xzww 21: RSQ TEMP[7].x, TEMP[7].xxxx 22: MUL TEMP[7].xyz, TEMP[5].xzww, TEMP[7].xxxx 23: MUL TEMP[5].xy, TEMP[4].xyyy, TEMP[7].xyyy 24: MUL TEMP[7].x, TEMP[3].xxxx, TEMP[7].zzzz 25: MOV TEMP[5].z, TEMP[7].xxxx 26: MUL TEMP[7].xyz, IN[4].xyzz, CONST[66].xxxx 27: ADD TEMP[5].xyz, TEMP[7].xyzz, TEMP[5].xyzz 28: DP3 TEMP[8].x, TEMP[5].xyzz, CONST[69].xyzz 29: MOV TEMP[8].z, TEMP[8].xxxx 30: MOV TEMP[6].xy, TEMP[1].zwzz 31: DP3 TEMP[9].x, TEMP[6].xyzz, TEMP[6].xyzz 32: RSQ TEMP[9].x, TEMP[9].xxxx 33: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[9].xxxx 34: MUL TEMP[1].xy, TEMP[4].zwww, TEMP[6].xyyy 35: MUL TEMP[6].x, TEMP[3].yyyy, TEMP[6].zzzz 36: MOV TEMP[1].w, TEMP[6].xxxx 37: ADD TEMP[1].xyz, TEMP[7].xyzz, TEMP[1].xyww 38: DP3 TEMP[6].x, TEMP[1].xyzz, CONST[69].xyzz 39: MOV TEMP[3].w, TEMP[6].xxxx 40: DP3 TEMP[8].x, TEMP[5].xyzz, CONST[67].xyzz 41: DP3 TEMP[6].x, TEMP[5].xyzz, CONST[68].xyzz 42: MOV TEMP[8].y, TEMP[6].xxxx 43: DP3 TEMP[6].x, TEMP[8].xyzz, TEMP[8].xyzz 44: RSQ TEMP[6].x, TEMP[6].xxxx 45: MUL TEMP[0].xyz, TEMP[8].xyzz, TEMP[6].xxxx 46: DP3 TEMP[3].x, TEMP[1].xyzz, CONST[67].xyzz 47: DP3 TEMP[6].x, TEMP[1].xyzz, CONST[68].xyzz 48: MOV TEMP[3].y, TEMP[6].xxxx 49: DP3 TEMP[1].x, TEMP[3].xyww, TEMP[3].xyww 50: RSQ TEMP[6].x, TEMP[1].xxxx 51: MUL TEMP[6].xyz, TEMP[3].xyww, TEMP[6].xxxx 52: MOV TEMP[1].w, IN[0].wwww 53: MAD TEMP[1].xyz, IN[3].xyzz, CONST[66].xxxx, IN[0].xyzz 54: DP4 TEMP[7].x, TEMP[1], CONST[69] 55: MOV TEMP[4].z, TEMP[7].xxxx 56: MUL TEMP[5].xyz, TEMP[8].zxyy, TEMP[3].ywxx 57: MAD TEMP[5].xyz, TEMP[8].yzxx, TEMP[3].wxyy, -TEMP[5].xyzz 58: MUL TEMP[3].xyz, TEMP[3].zzzz, TEMP[5].xyzz 59: DP3 TEMP[5].x, TEMP[3].xyzz, TEMP[3].xyzz 60: RSQ TEMP[5].x, TEMP[5].xxxx 61: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xxxx 62: DP4 TEMP[4].x, TEMP[1], CONST[67] 63: DP4 TEMP[7].x, TEMP[1], CONST[68] 64: MOV TEMP[4].y, TEMP[7].xxxx 65: ADD TEMP[1].xyz, -TEMP[4].xyzz, CONST[14].xyzz 66: DP3 TEMP[7].x, TEMP[1].xyzz, TEMP[1].xyzz 67: RSQ TEMP[7].x, TEMP[7].xxxx 68: MUL TEMP[2].xyz, TEMP[1].xyzz, TEMP[7].xxxx 69: MOV TEMP[7].xyz, TEMP[4].xyzx 70: MOV TEMP[4].w, CONST[0].yyyy 71: DP4 TEMP[8].x, TEMP[4], CONST[57] 72: DP4 TEMP[9].x, TEMP[4], CONST[54] 73: MOV TEMP[1].y, TEMP[9].xxxx 74: DP4 TEMP[9].x, TEMP[4], CONST[55] 75: MOV TEMP[1].z, TEMP[9].xxxx 76: DP4 TEMP[9].x, TEMP[4], CONST[56] 77: MOV TEMP[1].w, TEMP[9].xxxx 78: RCP TEMP[3].x, TEMP[8].xxxx 79: MUL TEMP[9].xyz, TEMP[1].yzww, TEMP[3].xxxx 80: DP4 TEMP[10].x, TEMP[4], CONST[8] 81: MOV TEMP[1].y, TEMP[10].xxxx 82: DP4 TEMP[11].x, TEMP[4], CONST[9] 83: MOV TEMP[1].z, TEMP[11].xxxx 84: DP4 TEMP[12].x, TEMP[4], CONST[11] 85: MOV TEMP[1].w, TEMP[12].xxxx 86: MOV TEMP[13].xyw, TEMP[1].yzyw 87: MOV TEMP[9].w, TEMP[8].xxxx 88: DP4 TEMP[8].x, TEMP[4], CONST[10] 89: MOV TEMP[13].z, TEMP[8].xxxx 90: DP4 TEMP[14].x, TEMP[4], CONST[13] 91: MOV TEMP[7].w, TEMP[14].xxxx 92: DP4 TEMP[14].x, IN[2], CONST[48] 93: DP4 TEMP[15].x, IN[2], CONST[49] 94: MOV TEMP[14].y, TEMP[15].xxxx 95: DP4 TEMP[15].x, IN[2], CONST[52] 96: MOV TEMP[14].z, TEMP[15].xxxx 97: DP4 TEMP[15].x, IN[2], CONST[53] 98: MOV TEMP[14].w, TEMP[15].xxxx 99: RCP TEMP[1].x, TEMP[12].xxxx 100: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[1].xxxx 101: MOV TEMP[5].w, TEMP[11].xxxx 102: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[1].xxxx 103: MOV TEMP[6].w, TEMP[10].xxxx 104: MUL TEMP[3].xyz, CONST[15].xyzz, CONST[58].xyzz 105: UIF CONST[71].xxxx :0 106: ENDIF 107: UIF CONST[72].xxxx :0 108: ENDIF 109: UIF CONST[73].xxxx :0 110: ENDIF 111: UIF CONST[74].xxxx :0 112: ENDIF 113: ADD TEMP[1].xyz, -TEMP[4].xyzz, CONST[2].xyzz 114: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 115: RSQ TEMP[4].x, TEMP[1].xxxx 116: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[1].xxxx 117: CMP TEMP[1].x, -TEMP[1].xxxx, TEMP[4].xxxx, IMM[0].xxxx 118: MAD TEMP[1].x, TEMP[1].xxxx, CONST[16].wwww, CONST[16].xxxx 119: MOV_SAT TEMP[1].x, TEMP[1].xxxx 120: MIN TEMP[4].x, TEMP[1].xxxx, CONST[16].zzzz 121: MOV TEMP[3].w, TEMP[4].xxxx 122: MUL TEMP[1], CONST[15].wwww, CONST[0].yyyx 123: MOV TEMP[2].w, CONST[0].xxxx 124: MOV TEMP[4], TEMP[13] 125: MAD TEMP[8].x, TEMP[8].xxxx, CONST[0].zzzz, -TEMP[13].wwww 126: MOV TEMP[13].z, TEMP[8].xxxx 127: MOV TEMP[13].y, -TEMP[13].yyyy 128: MAD TEMP[13].xy, CONST[70].xyyy, TEMP[13].wwww, TEMP[13].xyyy 129: MOV OUT[10], TEMP[9] 130: MOV OUT[2], TEMP[14] 131: MOV OUT[3], TEMP[1] 132: MOV OUT[4], TEMP[2] 133: MOV OUT[5], TEMP[3] 134: MOV OUT[6], TEMP[7] 135: MOV OUT[0], TEMP[13] 136: MOV OUT[7], TEMP[6] 137: MOV OUT[1], TEMP[4] 138: MOV OUT[8], TEMP[5] 139: MOV OUT[9], TEMP[0] 140: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 768) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 772) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 776) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 780) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 784) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 788) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 792) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 796) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 832) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 836) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 840) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 844) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 848) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 852) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 856) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 860) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 864) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 868) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 872) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 876) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 880) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 884) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 888) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 892) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 896) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 900) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 904) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 908) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 912) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 916) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 920) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 924) %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 928) %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 932) %83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 936) %84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1056) %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1072) %86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1076) %87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1080) %88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1084) %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1088) %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1092) %91 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1096) %92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1100) %93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1104) %94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1108) %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1112) %96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1116) %97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1120) %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1124) %99 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1152) %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1168) %101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1184) %102 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %103 = load <16 x i8> addrspace(2)* %102, !tbaa !0 %104 = add i32 %5, %7 %105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %103, i32 0, i32 %104) %106 = extractelement <4 x float> %105, i32 0 %107 = extractelement <4 x float> %105, i32 1 %108 = extractelement <4 x float> %105, i32 2 %109 = extractelement <4 x float> %105, i32 3 %110 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %111 = load <16 x i8> addrspace(2)* %110, !tbaa !0 %112 = add i32 %5, %7 %113 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %111, i32 0, i32 %112) %114 = extractelement <4 x float> %113, i32 0 %115 = extractelement <4 x float> %113, i32 1 %116 = extractelement <4 x float> %113, i32 2 %117 = extractelement <4 x float> %113, i32 3 %118 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %119 = load <16 x i8> addrspace(2)* %118, !tbaa !0 %120 = add i32 %5, %7 %121 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %119, i32 0, i32 %120) %122 = extractelement <4 x float> %121, i32 0 %123 = extractelement <4 x float> %121, i32 1 %124 = extractelement <4 x float> %121, i32 2 %125 = extractelement <4 x float> %121, i32 3 %126 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %127 = load <16 x i8> addrspace(2)* %126, !tbaa !0 %128 = add i32 %5, %7 %129 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %127, i32 0, i32 %128) %130 = extractelement <4 x float> %129, i32 0 %131 = extractelement <4 x float> %129, i32 1 %132 = extractelement <4 x float> %129, i32 2 %133 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %134 = load <16 x i8> addrspace(2)* %133, !tbaa !0 %135 = add i32 %5, %7 %136 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %134, i32 0, i32 %135) %137 = extractelement <4 x float> %136, i32 0 %138 = extractelement <4 x float> %136, i32 1 %139 = extractelement <4 x float> %136, i32 2 %140 = fadd float -1.280000e+02, %114 %141 = fadd float -1.280000e+02, %115 %142 = fadd float -1.280000e+02, %116 %143 = fadd float -1.280000e+02, %117 %144 = fcmp olt float %140, %13 %145 = sext i1 %144 to i32 %146 = fcmp olt float %141, %13 %147 = sext i1 %146 to i32 %148 = fcmp olt float %142, %13 %149 = sext i1 %148 to i32 %150 = fcmp olt float %143, %13 %151 = sext i1 %150 to i32 %152 = bitcast i32 %145 to float %153 = bitcast i32 %147 to float %154 = bitcast i32 %149 to float %155 = bitcast i32 %151 to float %156 = bitcast float %152 to i32 %157 = and i32 %156, 1065353216 %158 = bitcast float %153 to i32 %159 = and i32 %158, 1065353216 %160 = bitcast float %154 to i32 %161 = and i32 %160, 1065353216 %162 = bitcast float %155 to i32 %163 = and i32 %162, 1065353216 %164 = bitcast i32 %157 to float %165 = bitcast i32 %159 to float %166 = bitcast i32 %161 to float %167 = bitcast i32 %163 to float %168 = call float @fabs(float %140) %169 = call float @fabs(float %141) %170 = call float @fabs(float %142) %171 = call float @fabs(float %143) %172 = fsub float -0.000000e+00, %164 %173 = fadd float %168, %172 %174 = fsub float -0.000000e+00, %165 %175 = fadd float %169, %174 %176 = fsub float -0.000000e+00, %166 %177 = fadd float %170, %176 %178 = fsub float -0.000000e+00, %167 %179 = fadd float %171, %178 %180 = fadd float %173, -6.400000e+01 %181 = fadd float %175, -6.400000e+01 %182 = fadd float %177, -6.400000e+01 %183 = fadd float %179, -6.400000e+01 %184 = fsub float -0.000000e+00, %15 %185 = fmul float %164, %184 %186 = fadd float %185, %14 %187 = fsub float -0.000000e+00, %15 %188 = fmul float %166, %187 %189 = fadd float %188, %14 %190 = fsub float -0.000000e+00, %15 %191 = fmul float %167, %190 %192 = fadd float %191, %14 %193 = fcmp olt float %180, %13 %194 = sext i1 %193 to i32 %195 = fcmp olt float %181, %13 %196 = sext i1 %195 to i32 %197 = fcmp olt float %182, %13 %198 = sext i1 %197 to i32 %199 = fcmp olt float %183, %13 %200 = sext i1 %199 to i32 %201 = bitcast i32 %194 to float %202 = bitcast i32 %196 to float %203 = bitcast i32 %198 to float %204 = bitcast i32 %200 to float %205 = bitcast float %201 to i32 %206 = and i32 %205, 1065353216 %207 = bitcast float %202 to i32 %208 = and i32 %207, 1065353216 %209 = bitcast float %203 to i32 %210 = and i32 %209, 1065353216 %211 = bitcast float %204 to i32 %212 = and i32 %211, 1065353216 %213 = bitcast i32 %206 to float %214 = bitcast i32 %208 to float %215 = bitcast i32 %210 to float %216 = bitcast i32 %212 to float %217 = call float @fabs(float %180) %218 = call float @fabs(float %181) %219 = call float @fabs(float %182) %220 = call float @fabs(float %183) %221 = fsub float -0.000000e+00, %213 %222 = fadd float %217, %221 %223 = fsub float -0.000000e+00, %214 %224 = fadd float %218, %223 %225 = fsub float -0.000000e+00, %215 %226 = fadd float %219, %225 %227 = fsub float -0.000000e+00, %216 %228 = fadd float %220, %227 %229 = fmul float %222, 0xBF90410420000000 %230 = fadd float %229, 1.000000e+00 %231 = fmul float %226, 0xBF90410420000000 %232 = fadd float %231, 1.000000e+00 %233 = fmul float %224, 0xBF90410420000000 %234 = fadd float %233, %230 %235 = fmul float %228, 0xBF90410420000000 %236 = fadd float %235, %232 %237 = fsub float -0.000000e+00, %15 %238 = fmul float %213, %237 %239 = fadd float %238, %14 %240 = fsub float -0.000000e+00, %15 %241 = fmul float %214, %240 %242 = fadd float %241, %14 %243 = fsub float -0.000000e+00, %15 %244 = fmul float %215, %243 %245 = fadd float %244, %14 %246 = fsub float -0.000000e+00, %15 %247 = fmul float %216, %246 %248 = fadd float %247, %14 %249 = fmul float %222, 0x3F90410420000000 %250 = fmul float %224, 0x3F90410420000000 %251 = fmul float %226, 0x3F90410420000000 %252 = fmul float %228, 0x3F90410420000000 %253 = fmul float %249, %249 %254 = fmul float %250, %250 %255 = fadd float %254, %253 %256 = fmul float %234, %234 %257 = fadd float %255, %256 %258 = call float @llvm.AMDGPU.rsq.clamped.f32(float %257) %259 = fmul float %249, %258 %260 = fmul float %250, %258 %261 = fmul float %234, %258 %262 = fmul float %239, %259 %263 = fmul float %242, %260 %264 = fmul float %186, %261 %265 = fmul float %137, %84 %266 = fmul float %138, %84 %267 = fmul float %139, %84 %268 = fadd float %265, %262 %269 = fadd float %266, %263 %270 = fadd float %267, %264 %271 = fmul float %268, %93 %272 = fmul float %269, %94 %273 = fadd float %272, %271 %274 = fmul float %270, %95 %275 = fadd float %273, %274 %276 = fmul float %251, %251 %277 = fmul float %252, %252 %278 = fadd float %277, %276 %279 = fmul float %236, %236 %280 = fadd float %278, %279 %281 = call float @llvm.AMDGPU.rsq.clamped.f32(float %280) %282 = fmul float %251, %281 %283 = fmul float %252, %281 %284 = fmul float %236, %281 %285 = fmul float %245, %282 %286 = fmul float %248, %283 %287 = fmul float %189, %284 %288 = fadd float %265, %285 %289 = fadd float %266, %286 %290 = fadd float %267, %287 %291 = fmul float %288, %93 %292 = fmul float %289, %94 %293 = fadd float %292, %291 %294 = fmul float %290, %95 %295 = fadd float %293, %294 %296 = fmul float %268, %85 %297 = fmul float %269, %86 %298 = fadd float %297, %296 %299 = fmul float %270, %87 %300 = fadd float %298, %299 %301 = fmul float %268, %89 %302 = fmul float %269, %90 %303 = fadd float %302, %301 %304 = fmul float %270, %91 %305 = fadd float %303, %304 %306 = fmul float %300, %300 %307 = fmul float %305, %305 %308 = fadd float %307, %306 %309 = fmul float %275, %275 %310 = fadd float %308, %309 %311 = call float @llvm.AMDGPU.rsq.clamped.f32(float %310) %312 = fmul float %300, %311 %313 = fmul float %305, %311 %314 = fmul float %275, %311 %315 = fmul float %288, %85 %316 = fmul float %289, %86 %317 = fadd float %316, %315 %318 = fmul float %290, %87 %319 = fadd float %317, %318 %320 = fmul float %288, %89 %321 = fmul float %289, %90 %322 = fadd float %321, %320 %323 = fmul float %290, %91 %324 = fadd float %322, %323 %325 = fmul float %319, %319 %326 = fmul float %324, %324 %327 = fadd float %326, %325 %328 = fmul float %295, %295 %329 = fadd float %327, %328 %330 = call float @llvm.AMDGPU.rsq.clamped.f32(float %329) %331 = fmul float %319, %330 %332 = fmul float %324, %330 %333 = fmul float %295, %330 %334 = fmul float %130, %84 %335 = fadd float %334, %106 %336 = fmul float %131, %84 %337 = fadd float %336, %107 %338 = fmul float %132, %84 %339 = fadd float %338, %108 %340 = fmul float %335, %93 %341 = fmul float %337, %94 %342 = fadd float %340, %341 %343 = fmul float %339, %95 %344 = fadd float %342, %343 %345 = fmul float %109, %96 %346 = fadd float %344, %345 %347 = fmul float %275, %324 %348 = fmul float %300, %295 %349 = fmul float %305, %319 %350 = fsub float -0.000000e+00, %347 %351 = fmul float %305, %295 %352 = fadd float %351, %350 %353 = fsub float -0.000000e+00, %348 %354 = fmul float %275, %319 %355 = fadd float %354, %353 %356 = fsub float -0.000000e+00, %349 %357 = fmul float %300, %324 %358 = fadd float %357, %356 %359 = fmul float %192, %352 %360 = fmul float %192, %355 %361 = fmul float %192, %358 %362 = fmul float %359, %359 %363 = fmul float %360, %360 %364 = fadd float %363, %362 %365 = fmul float %361, %361 %366 = fadd float %364, %365 %367 = call float @llvm.AMDGPU.rsq.clamped.f32(float %366) %368 = fmul float %359, %367 %369 = fmul float %360, %367 %370 = fmul float %361, %367 %371 = fmul float %335, %85 %372 = fmul float %337, %86 %373 = fadd float %371, %372 %374 = fmul float %339, %87 %375 = fadd float %373, %374 %376 = fmul float %109, %88 %377 = fadd float %375, %376 %378 = fmul float %335, %89 %379 = fmul float %337, %90 %380 = fadd float %378, %379 %381 = fmul float %339, %91 %382 = fadd float %380, %381 %383 = fmul float %109, %92 %384 = fadd float %382, %383 %385 = fsub float -0.000000e+00, %377 %386 = fadd float %385, %39 %387 = fsub float -0.000000e+00, %384 %388 = fadd float %387, %40 %389 = fsub float -0.000000e+00, %346 %390 = fadd float %389, %41 %391 = fmul float %386, %386 %392 = fmul float %388, %388 %393 = fadd float %392, %391 %394 = fmul float %390, %390 %395 = fadd float %393, %394 %396 = call float @llvm.AMDGPU.rsq.clamped.f32(float %395) %397 = fmul float %386, %396 %398 = fmul float %388, %396 %399 = fmul float %390, %396 %400 = fmul float %377, %77 %401 = fmul float %384, %78 %402 = fadd float %400, %401 %403 = fmul float %346, %79 %404 = fadd float %402, %403 %405 = fmul float %14, %80 %406 = fadd float %404, %405 %407 = fmul float %377, %65 %408 = fmul float %384, %66 %409 = fadd float %407, %408 %410 = fmul float %346, %67 %411 = fadd float %409, %410 %412 = fmul float %14, %68 %413 = fadd float %411, %412 %414 = fmul float %377, %69 %415 = fmul float %384, %70 %416 = fadd float %414, %415 %417 = fmul float %346, %71 %418 = fadd float %416, %417 %419 = fmul float %14, %72 %420 = fadd float %418, %419 %421 = fmul float %377, %73 %422 = fmul float %384, %74 %423 = fadd float %421, %422 %424 = fmul float %346, %75 %425 = fadd float %423, %424 %426 = fmul float %14, %76 %427 = fadd float %425, %426 %428 = fdiv float 1.000000e+00, %406 %429 = fmul float %413, %428 %430 = fmul float %420, %428 %431 = fmul float %427, %428 %432 = fmul float %377, %19 %433 = fmul float %384, %20 %434 = fadd float %432, %433 %435 = fmul float %346, %21 %436 = fadd float %434, %435 %437 = fmul float %14, %22 %438 = fadd float %436, %437 %439 = fmul float %377, %23 %440 = fmul float %384, %24 %441 = fadd float %439, %440 %442 = fmul float %346, %25 %443 = fadd float %441, %442 %444 = fmul float %14, %26 %445 = fadd float %443, %444 %446 = fmul float %377, %31 %447 = fmul float %384, %32 %448 = fadd float %446, %447 %449 = fmul float %346, %33 %450 = fadd float %448, %449 %451 = fmul float %14, %34 %452 = fadd float %450, %451 %453 = fmul float %377, %27 %454 = fmul float %384, %28 %455 = fadd float %453, %454 %456 = fmul float %346, %29 %457 = fadd float %455, %456 %458 = fmul float %14, %30 %459 = fadd float %457, %458 %460 = fmul float %377, %35 %461 = fmul float %384, %36 %462 = fadd float %460, %461 %463 = fmul float %346, %37 %464 = fadd float %462, %463 %465 = fmul float %14, %38 %466 = fadd float %464, %465 %467 = fmul float %122, %49 %468 = fmul float %123, %50 %469 = fadd float %467, %468 %470 = fmul float %124, %51 %471 = fadd float %469, %470 %472 = fmul float %125, %52 %473 = fadd float %471, %472 %474 = fmul float %122, %53 %475 = fmul float %123, %54 %476 = fadd float %474, %475 %477 = fmul float %124, %55 %478 = fadd float %476, %477 %479 = fmul float %125, %56 %480 = fadd float %478, %479 %481 = fmul float %122, %57 %482 = fmul float %123, %58 %483 = fadd float %481, %482 %484 = fmul float %124, %59 %485 = fadd float %483, %484 %486 = fmul float %125, %60 %487 = fadd float %485, %486 %488 = fmul float %122, %61 %489 = fmul float %123, %62 %490 = fadd float %488, %489 %491 = fmul float %124, %63 %492 = fadd float %490, %491 %493 = fmul float %125, %64 %494 = fadd float %492, %493 %495 = fdiv float 1.000000e+00, %452 %496 = fmul float %445, %495 %497 = fmul float %438, %495 %498 = fmul float %42, %81 %499 = fmul float %43, %82 %500 = fmul float %44, %83 %501 = bitcast float %99 to i32 %502 = icmp ne i32 %501, 0 %503 = bitcast float %100 to i32 %504 = icmp ne i32 %503, 0 %505 = bitcast float %101 to i32 %506 = icmp ne i32 %505, 0 %507 = fsub float -0.000000e+00, %377 %508 = fadd float %507, %16 %509 = fsub float -0.000000e+00, %384 %510 = fadd float %509, %17 %511 = fsub float -0.000000e+00, %346 %512 = fadd float %511, %18 %513 = fmul float %508, %508 %514 = fmul float %510, %510 %515 = fadd float %514, %513 %516 = fmul float %512, %512 %517 = fadd float %515, %516 %518 = call float @llvm.AMDGPU.rsq.clamped.f32(float %517) %519 = fmul float %518, %517 %520 = fsub float -0.000000e+00, %517 %521 = call float @llvm.AMDGPU.cndlt(float %520, float %519, float 0.000000e+00) %522 = fmul float %521, %48 %523 = fadd float %522, %46 %524 = call float @llvm.AMDIL.clamp.(float %523, float 0.000000e+00, float 1.000000e+00) %525 = call float @llvm.minnum.f32(float %524, float %47) %526 = fmul float %45, %14 %527 = fmul float %45, %14 %528 = fmul float %45, %14 %529 = fmul float %45, %13 %530 = fsub float -0.000000e+00, %452 %531 = fmul float %459, %15 %532 = fadd float %531, %530 %533 = fsub float -0.000000e+00, %445 %534 = fmul float %97, %452 %535 = fadd float %534, %438 %536 = fmul float %98, %452 %537 = fadd float %536, %533 %538 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 16 %539 = load <16 x i8> addrspace(2)* %538, !tbaa !0 %540 = call float @llvm.SI.load.const(<16 x i8> %539, i32 0) %541 = fmul float %540, %438 %542 = call float @llvm.SI.load.const(<16 x i8> %539, i32 4) %543 = fmul float %542, %445 %544 = fadd float %541, %543 %545 = call float @llvm.SI.load.const(<16 x i8> %539, i32 8) %546 = fmul float %545, %459 %547 = fadd float %544, %546 %548 = call float @llvm.SI.load.const(<16 x i8> %539, i32 12) %549 = fmul float %548, %452 %550 = fadd float %547, %549 %551 = call float @llvm.SI.load.const(<16 x i8> %539, i32 16) %552 = fmul float %551, %438 %553 = call float @llvm.SI.load.const(<16 x i8> %539, i32 20) %554 = fmul float %553, %445 %555 = fadd float %552, %554 %556 = call float @llvm.SI.load.const(<16 x i8> %539, i32 24) %557 = fmul float %556, %459 %558 = fadd float %555, %557 %559 = call float @llvm.SI.load.const(<16 x i8> %539, i32 28) %560 = fmul float %559, %452 %561 = fadd float %558, %560 %562 = call float @llvm.SI.load.const(<16 x i8> %539, i32 32) %563 = fmul float %562, %438 %564 = call float @llvm.SI.load.const(<16 x i8> %539, i32 36) %565 = fmul float %564, %445 %566 = fadd float %563, %565 %567 = call float @llvm.SI.load.const(<16 x i8> %539, i32 40) %568 = fmul float %567, %459 %569 = fadd float %566, %568 %570 = call float @llvm.SI.load.const(<16 x i8> %539, i32 44) %571 = fmul float %570, %452 %572 = fadd float %569, %571 %573 = call float @llvm.SI.load.const(<16 x i8> %539, i32 48) %574 = fmul float %573, %438 %575 = call float @llvm.SI.load.const(<16 x i8> %539, i32 52) %576 = fmul float %575, %445 %577 = fadd float %574, %576 %578 = call float @llvm.SI.load.const(<16 x i8> %539, i32 56) %579 = fmul float %578, %459 %580 = fadd float %577, %579 %581 = call float @llvm.SI.load.const(<16 x i8> %539, i32 60) %582 = fmul float %581, %452 %583 = fadd float %580, %582 %584 = call float @llvm.SI.load.const(<16 x i8> %539, i32 64) %585 = fmul float %584, %438 %586 = call float @llvm.SI.load.const(<16 x i8> %539, i32 68) %587 = fmul float %586, %445 %588 = fadd float %585, %587 %589 = call float @llvm.SI.load.const(<16 x i8> %539, i32 72) %590 = fmul float %589, %459 %591 = fadd float %588, %590 %592 = call float @llvm.SI.load.const(<16 x i8> %539, i32 76) %593 = fmul float %592, %452 %594 = fadd float %591, %593 %595 = call float @llvm.SI.load.const(<16 x i8> %539, i32 80) %596 = fmul float %595, %438 %597 = call float @llvm.SI.load.const(<16 x i8> %539, i32 84) %598 = fmul float %597, %445 %599 = fadd float %596, %598 %600 = call float @llvm.SI.load.const(<16 x i8> %539, i32 88) %601 = fmul float %600, %459 %602 = fadd float %599, %601 %603 = call float @llvm.SI.load.const(<16 x i8> %539, i32 92) %604 = fmul float %603, %452 %605 = fadd float %602, %604 %606 = call float @llvm.SI.load.const(<16 x i8> %539, i32 96) %607 = fmul float %606, %438 %608 = call float @llvm.SI.load.const(<16 x i8> %539, i32 100) %609 = fmul float %608, %445 %610 = fadd float %607, %609 %611 = call float @llvm.SI.load.const(<16 x i8> %539, i32 104) %612 = fmul float %611, %459 %613 = fadd float %610, %612 %614 = call float @llvm.SI.load.const(<16 x i8> %539, i32 108) %615 = fmul float %614, %452 %616 = fadd float %613, %615 %617 = call float @llvm.SI.load.const(<16 x i8> %539, i32 112) %618 = fmul float %617, %438 %619 = call float @llvm.SI.load.const(<16 x i8> %539, i32 116) %620 = fmul float %619, %445 %621 = fadd float %618, %620 %622 = call float @llvm.SI.load.const(<16 x i8> %539, i32 120) %623 = fmul float %622, %459 %624 = fadd float %621, %623 %625 = call float @llvm.SI.load.const(<16 x i8> %539, i32 124) %626 = fmul float %625, %452 %627 = fadd float %624, %626 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %473, float %480, float %487, float %494) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %526, float %527, float %528, float %529) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %397, float %398, float %399, float %13) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %498, float %499, float %500, float %525) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %377, float %384, float %346, float %466) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %331, float %332, float %333, float %497) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %368, float %369, float %370, float %496) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %312, float %313, float %314, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 40, i32 0, float %429, float %430, float %431, float %406) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %535, float %537, float %532, float %452) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %550, float %561, float %572, float %583) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %594, float %605, float %616, float %627) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #3 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[20:23], s[8:9], 0x4 ; C08A0904 s_load_dwordx4 s[4:7], s[8:9], 0x8 ; C0820908 s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C s_load_dwordx4 s[16:19], s[8:9], 0x10 ; C0880910 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s0, s[4:7], 0xd5 ; C20005D5 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s0, v2 ; 100A0400 s_buffer_load_dword s0, s[4:7], 0xd4 ; C20005D4 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v1, s0, v5 ; D2820005 04140101 s_buffer_load_dword s0, s[4:7], 0xd6 ; C20005D6 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s0, v5 ; D2820005 04140103 s_buffer_load_dword s0, s[4:7], 0xd7 ; C20005D7 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s0, v5 ; D2820005 04140104 s_buffer_load_dword s0, s[4:7], 0xd1 ; C20005D1 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s0, v2 ; 100C0400 s_buffer_load_dword s0, s[4:7], 0xd0 ; C20005D0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s0, v6 ; D2820006 04180101 s_buffer_load_dword s0, s[4:7], 0xd2 ; C20005D2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s0, v6 ; D2820006 04180103 s_buffer_load_dword s0, s[4:7], 0xd3 ; C20005D3 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v4, s0, v6 ; D2820006 04180104 s_buffer_load_dword s0, s[4:7], 0xc5 ; C20005C5 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s0, v2 ; 100E0400 s_buffer_load_dword s0, s[4:7], 0xc4 ; C20005C4 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v1, s0, v7 ; D2820007 041C0101 s_buffer_load_dword s0, s[4:7], 0xc6 ; C20005C6 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v3, s0, v7 ; D2820007 041C0103 s_buffer_load_dword s0, s[4:7], 0xc7 ; C20005C7 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v4, s0, v7 ; D2820007 041C0104 s_buffer_load_dword s0, s[4:7], 0xc1 ; C20005C1 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s0, v2 ; 10100400 s_buffer_load_dword s0, s[4:7], 0xc0 ; C20005C0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v1, s0, v8 ; D2820008 04200101 s_buffer_load_dword s0, s[4:7], 0xc2 ; C20005C2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v3, s0, v8 ; D2820008 04200103 s_buffer_load_dword s0, s[4:7], 0xc3 ; C20005C3 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s0, v8 ; D2820001 04200104 exp 15, 32, 0, 0, 0, v1, v7, v6, v5 ; F800020F 05060701 s_buffer_load_dword s11, s[4:7], 0x0 ; C2058500 s_buffer_load_dword s1, s[4:7], 0x3f ; C200853F s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v1, s11 ; 7E02020B v_mul_f32_e32 v1, s1, v1 ; 10020201 s_buffer_load_dword s0, s[4:7], 0x1 ; C2000501 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s0 ; 7E040200 v_mul_f32_e32 v2, s1, v2 ; 10040401 exp 15, 33, 0, 0, 0, v2, v2, v2, v1 ; F800021F 01020202 s_movk_i32 s1, 0x420 ; B0010420 s_buffer_load_dword s8, s[4:7], s1 ; C2040401 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 buffer_load_format_xyzw v[5:8], v0, s[24:27], 0 idxen ; E00C2000 80060500 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v9, v5, s8, v1 ; D2820009 04041105 v_mad_f32 v10, v6, s8, v2 ; D282000A 04081106 s_movk_i32 s1, 0x444 ; B0010444 s_buffer_load_dword s1, s[4:7], s1 ; C2008401 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v11, s1, v10 ; 10161401 s_movk_i32 s9, 0x440 ; B0090440 s_buffer_load_dword s9, s[4:7], s9 ; C2048409 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v11, v9, s9, v11 ; D282000B 042C1309 v_mad_f32 v5, v7, s8, v3 ; D2820005 040C1107 s_movk_i32 s10, 0x448 ; B00A0448 s_buffer_load_dword s10, s[4:7], s10 ; C205040A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v5, s10, v11 ; D2820006 042C1505 s_movk_i32 s12, 0x44c ; B00C044C s_buffer_load_dword s12, s[4:7], s12 ; C206040C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v4, s12, v6 ; D2820006 04181904 s_buffer_load_dword s12, s[4:7], 0x39 ; C2060539 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v7, s12, v6 ; 080E0C0C s_movk_i32 s12, 0x434 ; B00C0434 s_buffer_load_dword s12, s[4:7], s12 ; C206040C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s12, v10 ; 1010140C s_movk_i32 s13, 0x430 ; B00D0430 s_buffer_load_dword s14, s[4:7], s13 ; C207040D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v9, s14, v8 ; D2820008 04201D09 s_movk_i32 s13, 0x438 ; B00D0438 s_buffer_load_dword s13, s[4:7], s13 ; C206840D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v5, s13, v8 ; D2820008 04201B05 s_movk_i32 s15, 0x43c ; B00F043C s_buffer_load_dword s15, s[4:7], s15 ; C207840F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v4, s15, v8 ; D2820008 04201F04 s_buffer_load_dword s15, s[4:7], 0x38 ; C2078538 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v11, s15, v8 ; 0816100F v_mul_f32_e32 v12, v11, v11 ; 1018170B v_mad_f32 v12, v7, v7, v12 ; D282000C 04320F07 s_movk_i32 s15, 0x454 ; B00F0454 s_buffer_load_dword s15, s[4:7], s15 ; C207840F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s15, v10 ; 1014140F s_movk_i32 s24, 0x450 ; B0180450 s_buffer_load_dword s24, s[4:7], s24 ; C20C0418 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v9, v9, s24, v10 ; D2820009 04283109 s_movk_i32 s25, 0x458 ; B0190458 s_buffer_load_dword s25, s[4:7], s25 ; C20C8419 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v5, s25, v9 ; D2820005 04243305 s_movk_i32 s26, 0x45c ; B01A045C s_buffer_load_dword s26, s[4:7], s26 ; C20D041A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s26, v5 ; D2820001 04143504 s_buffer_load_dword s26, s[4:7], 0x3a ; C20D053A s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v2, s26, v1 ; 0804021A v_mad_f32 v3, v2, v2, v12 ; D2820003 04320502 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mul_f32_e32 v4, v3, v7 ; 10080F03 v_mul_f32_e32 v3, v3, v11 ; 10061703 v_mov_b32_e32 v5, s11 ; 7E0A020B exp 15, 34, 0, 0, 0, v3, v4, v2, v5 ; F800022F 05020403 s_buffer_load_dword s26, s[4:7], 0x9 ; C20D0509 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_sub_f32_e32 v2, s26, v6 ; 08040C1A s_buffer_load_dword s26, s[4:7], 0x8 ; C20D0508 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s26, v8 ; 0806101A v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mad_f32 v2, v2, v2, v3 ; D2820002 040E0502 s_buffer_load_dword s26, s[4:7], 0xa ; C20D050A s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s26, v1 ; 0806021A v_mad_f32 v2, v3, v3, v2 ; D2820002 040A0703 v_rsq_clamp_f32_e32 v3, v2 ; 7E065902 v_mul_f32_e32 v3, v2, v3 ; 10060702 v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_mov_b32_e32 v2, 0 ; 7E040280 v_cndmask_b32_e64 v3, 0, v3, vcc ; D2000803 01AA0680 s_buffer_load_dword s26, s[4:7], 0x40 ; C20D0540 s_buffer_load_dword s27, s[4:7], 0x43 ; C20D8543 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s26 ; 7E08021A v_mad_f32 v3, s27, v3, v4 ; D2820003 0412061B v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 s_buffer_load_dword s26, s[4:7], 0x42 ; C20D0542 s_waitcnt lgkmcnt(0) ; BF8C007F v_min_f32_e32 v3, s26, v3 ; 1E06061A s_buffer_load_dword s26, s[4:7], 0xea ; C20D05EA s_buffer_load_dword s27, s[4:7], 0x3e ; C20D853E s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s26 ; 7E08021A v_mul_f32_e32 v4, s27, v4 ; 1008081B s_buffer_load_dword s26, s[4:7], 0xe9 ; C20D05E9 s_buffer_load_dword s27, s[4:7], 0x3d ; C20D853D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s26 ; 7E0A021A v_mul_f32_e32 v5, s27, v5 ; 100A0A1B s_buffer_load_dword s26, s[4:7], 0xe8 ; C20D05E8 s_buffer_load_dword s27, s[4:7], 0x3c ; C20D853C s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s26 ; 7E0E021A v_mul_f32_e32 v7, s27, v7 ; 100E0E1B exp 15, 35, 0, 0, 0, v7, v5, v4, v3 ; F800023F 03040507 s_buffer_load_dword s26, s[4:7], 0x35 ; C20D0535 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v3, s26, v6 ; 10060C1A s_buffer_load_dword s26, s[4:7], 0x34 ; C20D0534 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v8, s26, v3 ; D2820003 040C3508 s_buffer_load_dword s26, s[4:7], 0x36 ; C20D0536 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v1, s26, v3 ; D2820003 040C3501 s_buffer_load_dword s26, s[4:7], 0x37 ; C20D0537 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s26 ; 7E08021A v_mad_f32 v3, v4, s0, v3 ; D2820003 040C0104 exp 15, 36, 0, 0, 0, v8, v6, v1, v3 ; F800024F 03010608 buffer_load_format_xyzw v[9:12], v0, s[20:23], 0 idxen ; E00C2000 80050900 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v3, 0xc3000000 ; 7E0602FF C3000000 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v3, v11 ; 06081703 v_cmp_lt_f32_e64 s[20:21], v4, s11 ; D0020014 00001704 v_cndmask_b32_e64 v5, 0, -1, s[20:21] ; D2000805 00518280 v_and_b32_e32 v5, 1.0, v5 ; 360A0AF2 v_sub_f32_e64 v4, |v4|, v5 ; D2080104 00020B04 v_mov_b32_e32 v7, 0xc2800000 ; 7E0E02FF C2800000 v_add_f32_e32 v4, v7, v4 ; 06080907 v_cmp_lt_f32_e64 s[20:21], v4, s11 ; D0020014 00001704 v_cndmask_b32_e64 v13, 0, -1, s[20:21] ; D200080D 00518280 v_and_b32_e32 v13, 1.0, v13 ; 361A1AF2 v_sub_f32_e64 v4, |v4|, v13 ; D2080104 00021B04 v_mov_b32_e32 v14, 0xbc820821 ; 7E1C02FF BC820821 v_mad_f32 v15, v4, v14, 1.0 ; D282000F 03CA1D04 v_add_f32_e32 v16, v3, v12 ; 06201903 v_cmp_lt_f32_e64 s[20:21], v16, s11 ; D0020014 00001710 v_cndmask_b32_e64 v17, 0, -1, s[20:21] ; D2000811 00518280 v_and_b32_e32 v17, 1.0, v17 ; 362222F2 v_sub_f32_e64 v16, |v16|, v17 ; D2080110 00022310 v_add_f32_e32 v16, v7, v16 ; 06202107 v_cmp_lt_f32_e64 s[20:21], v16, s11 ; D0020014 00001710 v_cndmask_b32_e64 v18, 0, -1, s[20:21] ; D2000012 00518280 v_and_b32_e32 v18, 1.0, v18 ; 362424F2 v_sub_f32_e64 v16, |v16|, v18 ; D2080110 00022510 v_mad_f32 v15, v16, v14, v15 ; D282000F 043E1D10 v_mul_f32_e32 v16, 0x3c820821, v16 ; 102020FF 3C820821 v_mul_f32_e32 v4, 0x3c820821, v4 ; 100808FF 3C820821 v_mul_f32_e32 v19, v4, v4 ; 10260904 v_mad_f32 v19, v16, v16, v19 ; D2820013 044E2110 v_mad_f32 v19, v15, v15, v19 ; D2820013 044E1F0F v_rsq_clamp_f32_e32 v19, v19 ; 7E265913 v_mul_f32_e32 v16, v19, v16 ; 10202113 s_buffer_load_dword s20, s[4:7], 0x2 ; C20A0502 v_mov_b32_e32 v20, s0 ; 7E280200 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v18, -v18, s20, v20 ; D2820012 24502912 v_mul_f32_e32 v16, v16, v18 ; 10202510 buffer_load_format_xyzw v[20:23], v0, s[16:19], 0 idxen ; E00C2000 80041400 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v21, s8, v16 ; D2820000 04401115 v_mul_f32_e32 v4, v19, v4 ; 10080913 v_mov_b32_e32 v16, s0 ; 7E200200 v_mad_f32 v13, -v13, s20, v16 ; D282000D 2440290D v_mul_f32_e32 v4, v4, v13 ; 10081B04 v_mad_f32 v4, v20, s8, v4 ; D2820004 04101114 v_mul_f32_e32 v13, s9, v4 ; 101A0809 v_mad_f32 v13, v0, s1, v13 ; D282000D 04340300 v_mul_f32_e32 v15, v19, v15 ; 101E1F13 v_mov_b32_e32 v16, s0 ; 7E200200 v_mad_f32 v5, -v5, s20, v16 ; D2820005 24402905 v_mul_f32_e32 v5, v15, v5 ; 100A0B0F v_mad_f32 v5, v22, s8, v5 ; D2820005 04141116 v_mad_f32 v13, v5, s10, v13 ; D282000D 04341505 v_mul_f32_e32 v15, s14, v4 ; 101E080E v_mad_f32 v15, v0, s12, v15 ; D282000F 043C1900 v_mad_f32 v15, v5, s13, v15 ; D282000F 043C1B05 v_mul_f32_e32 v16, v15, v15 ; 10201F0F v_mad_f32 v16, v13, v13, v16 ; D2820010 04421B0D v_mul_f32_e32 v4, s24, v4 ; 10080818 v_mad_f32 v0, v0, s15, v4 ; D2820000 04101F00 v_mad_f32 v0, v5, s25, v0 ; D2820000 04003305 v_mad_f32 v4, v0, v0, v16 ; D2820004 04420100 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 v_mul_f32_e32 v5, v4, v0 ; 100A0104 v_mul_f32_e32 v16, v4, v13 ; 10201B04 v_mul_f32_e32 v4, v4, v15 ; 10081F04 s_buffer_load_dword s16, s[4:7], 0x21 ; C2080521 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v18, s16, v6 ; 10240C10 s_buffer_load_dword s16, s[4:7], 0x20 ; C2080520 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v18, v8, s16, v18 ; D2820012 04482108 s_buffer_load_dword s16, s[4:7], 0x22 ; C2080522 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v18, v1, s16, v18 ; D2820012 04482101 s_buffer_load_dword s16, s[4:7], 0x23 ; C2080523 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v19, s16 ; 7E260210 v_mad_f32 v18, v19, s0, v18 ; D2820012 04480113 s_buffer_load_dword s16, s[4:7], 0x2d ; C208052D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v19, s16, v6 ; 10260C10 s_buffer_load_dword s16, s[4:7], 0x2c ; C208052C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v19, v8, s16, v19 ; D2820013 044C2108 s_buffer_load_dword s16, s[4:7], 0x2e ; C208052E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v19, v1, s16, v19 ; D2820013 044C2101 s_buffer_load_dword s16, s[4:7], 0x2f ; C208052F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v24, s16 ; 7E300210 v_mad_f32 v19, v24, s0, v19 ; D2820013 044C0118 v_rcp_f32_e32 v24, v19 ; 7E305513 v_mul_f32_e32 v25, v24, v18 ; 10322518 exp 15, 37, 0, 0, 0, v4, v16, v5, v25 ; F800025F 19051004 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v4, v3, v9 ; 06081303 v_cmp_lt_f32_e64 s[16:17], v4, s11 ; D0020010 00001704 v_cndmask_b32_e64 v5, 0, -1, s[16:17] ; D2000805 00418280 v_and_b32_e32 v5, 1.0, v5 ; 360A0AF2 v_sub_f32_e64 v4, |v4|, v5 ; D2080104 00020B04 v_add_f32_e32 v4, v7, v4 ; 06080907 v_cmp_lt_f32_e64 s[16:17], v4, s11 ; D0020010 00001704 v_cndmask_b32_e64 v16, 0, -1, s[16:17] ; D2000010 00418280 v_and_b32_e32 v16, 1.0, v16 ; 362020F2 v_sub_f32_e64 v4, |v4|, v16 ; D2080104 00022104 v_mad_f32 v25, v4, v14, 1.0 ; D2820019 03CA1D04 v_add_f32_e32 v3, v3, v10 ; 06061503 v_cmp_lt_f32_e64 s[16:17], v3, s11 ; D0020010 00001703 v_cndmask_b32_e64 v9, 0, -1, s[16:17] ; D2000809 00418280 v_and_b32_e32 v9, 1.0, v9 ; 361212F2 v_sub_f32_e64 v3, |v3|, v9 ; D2080103 00021303 v_add_f32_e32 v3, v7, v3 ; 06060707 v_cmp_lt_f32_e64 s[16:17], v3, s11 ; D0020010 00001703 v_cndmask_b32_e64 v7, 0, -1, s[16:17] ; D2000807 00418280 v_and_b32_e32 v7, 1.0, v7 ; 360E0EF2 v_sub_f32_e64 v3, |v3|, v7 ; D2080103 00020F03 v_mad_f32 v9, v3, v14, v25 ; D2820009 04661D03 v_mul_f32_e32 v3, 0x3c820821, v3 ; 100606FF 3C820821 v_mul_f32_e32 v4, 0x3c820821, v4 ; 100808FF 3C820821 v_mul_f32_e32 v10, v4, v4 ; 10140904 v_mad_f32 v10, v3, v3, v10 ; D282000A 042A0703 v_mad_f32 v10, v9, v9, v10 ; D282000A 042A1309 v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A v_mul_f32_e32 v3, v10, v3 ; 1006070A v_mov_b32_e32 v11, s0 ; 7E160200 v_mad_f32 v7, -v7, s20, v11 ; D2820007 242C2907 v_mul_f32_e32 v3, v3, v7 ; 10060F03 v_mad_f32 v3, v21, s8, v3 ; D2820003 040C1115 v_mul_f32_e32 v4, v10, v4 ; 1008090A v_mov_b32_e32 v7, s0 ; 7E0E0200 v_mad_f32 v7, -v16, s20, v7 ; D2820007 241C2910 v_mul_f32_e32 v4, v4, v7 ; 10080F04 v_mad_f32 v4, v20, s8, v4 ; D2820004 04101114 v_mul_f32_e32 v7, s14, v4 ; 100E080E v_mad_f32 v7, v3, s12, v7 ; D2820007 041C1903 v_mul_f32_e32 v9, v10, v9 ; 1012130A v_mov_b32_e32 v10, s0 ; 7E140200 v_mad_f32 v5, -v5, s20, v10 ; D2820005 24282905 v_mul_f32_e32 v5, v9, v5 ; 100A0B09 v_mad_f32 v5, v22, s8, v5 ; D2820005 04141116 v_mad_f32 v7, v5, s13, v7 ; D2820007 041C1B05 v_mul_f32_e32 v9, v0, v7 ; 10120F00 v_mul_f32_e32 v10, s24, v4 ; 10140818 v_mad_f32 v10, v3, s15, v10 ; D282000A 04281F03 v_mad_f32 v10, v5, s25, v10 ; D282000A 04283305 v_mad_f32 v9, v10, v15, -v9 ; D2820009 84261F0A v_mov_b32_e32 v11, s0 ; 7E160200 v_mad_f32 v11, -v17, s20, v11 ; D282000B 242C2911 v_mul_f32_e32 v9, v9, v11 ; 10121709 v_mul_f32_e32 v12, v13, v10 ; 1018150D v_mul_f32_e32 v4, s9, v4 ; 10080809 v_mad_f32 v3, v3, s1, v4 ; D2820003 04100303 v_mad_f32 v3, v5, s10, v3 ; D2820003 040C1505 v_mad_f32 v0, v3, v0, -v12 ; D2820000 84320103 v_mul_f32_e32 v0, v0, v11 ; 10001700 v_mul_f32_e32 v4, v0, v0 ; 10080100 v_mad_f32 v4, v9, v9, v4 ; D2820004 04121309 v_mul_f32_e32 v5, v15, v3 ; 100A070F v_mad_f32 v5, v7, v13, -v5 ; D2820005 84161B07 v_mul_f32_e32 v5, v5, v11 ; 100A1705 v_mad_f32 v4, v5, v5, v4 ; D2820004 04120B05 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 v_mul_f32_e32 v5, v4, v5 ; 100A0B04 v_mul_f32_e32 v9, v4, v9 ; 10121304 v_mul_f32_e32 v0, v4, v0 ; 10000104 s_buffer_load_dword s1, s[4:7], 0x25 ; C2008525 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s1, v6 ; 10080C01 s_buffer_load_dword s1, s[4:7], 0x24 ; C2008524 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v8, s1, v4 ; D2820004 04100308 s_buffer_load_dword s1, s[4:7], 0x26 ; C2008526 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v1, s1, v4 ; D2820004 04100301 s_buffer_load_dword s1, s[4:7], 0x27 ; C2008527 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v11, s1 ; 7E160201 v_mad_f32 v4, v11, s0, v4 ; D2820004 0410010B v_mul_f32_e32 v11, v24, v4 ; 10160918 exp 15, 38, 0, 0, 0, v0, v9, v5, v11 ; F800026F 0B050900 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v7, v7 ; 10000F07 v_mad_f32 v0, v3, v3, v0 ; D2820000 04020703 v_mad_f32 v0, v10, v10, v0 ; D2820000 0402150A v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mul_f32_e32 v5, v0, v10 ; 100A1500 v_mul_f32_e32 v3, v0, v3 ; 10060700 v_mul_f32_e32 v0, v0, v7 ; 10000F00 exp 15, 39, 0, 0, 0, v0, v3, v5, v2 ; F800027F 02050300 s_buffer_load_dword s1, s[4:7], 0xe1 ; C20085E1 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s1, v6 ; 10000C01 s_buffer_load_dword s1, s[4:7], 0xe0 ; C20085E0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v8, s1, v0 ; D2820000 04000308 s_buffer_load_dword s1, s[4:7], 0xe2 ; C20085E2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v1, s1, v0 ; D2820000 04000301 s_buffer_load_dword s1, s[4:7], 0xe3 ; C20085E3 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s1 ; 7E040201 v_mad_f32 v0, v2, s0, v0 ; D2820000 04000102 s_buffer_load_dword s1, s[4:7], 0xe5 ; C20085E5 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s1, v6 ; 10040C01 s_buffer_load_dword s1, s[4:7], 0xe4 ; C20085E4 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v8, s1, v2 ; D2820002 04080308 s_buffer_load_dword s1, s[4:7], 0xe6 ; C20085E6 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v1, s1, v2 ; D2820002 04080301 s_buffer_load_dword s1, s[4:7], 0xe7 ; C20085E7 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s1 ; 7E060201 v_mad_f32 v2, v3, s0, v2 ; D2820002 04080103 v_rcp_f32_e32 v3, v2 ; 7E065502 v_mul_f32_e32 v0, v3, v0 ; 10000103 s_buffer_load_dword s1, s[4:7], 0xdd ; C20085DD s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s1, v6 ; 100A0C01 s_buffer_load_dword s1, s[4:7], 0xdc ; C20085DC s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v8, s1, v5 ; D2820005 04140308 s_buffer_load_dword s1, s[4:7], 0xde ; C20085DE s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v1, s1, v5 ; D2820005 04140301 s_buffer_load_dword s1, s[4:7], 0xdf ; C20085DF s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s1 ; 7E0E0201 v_mad_f32 v5, v7, s0, v5 ; D2820005 04140107 v_mul_f32_e32 v5, v3, v5 ; 100A0B03 s_buffer_load_dword s1, s[4:7], 0xd9 ; C20085D9 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s1, v6 ; 100E0C01 s_buffer_load_dword s1, s[4:7], 0xd8 ; C20085D8 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v8, s1, v7 ; D2820007 041C0308 s_buffer_load_dword s1, s[4:7], 0xda ; C20085DA s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v1, s1, v7 ; D2820007 041C0301 s_buffer_load_dword s1, s[4:7], 0xdb ; C20085DB s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v9, s1 ; 7E120201 v_mad_f32 v7, v9, s0, v7 ; D2820007 041C0109 v_mul_f32_e32 v3, v3, v7 ; 10060F03 exp 15, 40, 0, 0, 0, v3, v5, v0, v2 ; F800028F 02000503 s_buffer_load_dword s1, s[4:7], 0x29 ; C2008529 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s1, v6 ; 10000C01 s_buffer_load_dword s1, s[4:7], 0x28 ; C2008528 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v8, s1, v0 ; D2820000 04000308 s_buffer_load_dword s1, s[4:7], 0x2a ; C200852A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v1, s1, v0 ; D2820000 04000301 s_buffer_load_dword s1, s[4:7], 0x2b ; C200852B s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s1 ; 7E020201 v_mad_f32 v0, v1, s0, v0 ; D2820000 04000101 v_mad_f32 v1, v0, s20, -v19 ; D2820001 844C2900 s_movk_i32 s0, 0x464 ; B0000464 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s0, v19, -v4 ; D2820002 84122600 s_movk_i32 s0, 0x460 ; B0000460 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s0, v19, v18 ; D2820003 044A2600 exp 15, 12, 0, 0, 0, v3, v2, v1, v19 ; F80000CF 13010203 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v4 ; 10020804 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s4, v18, v1 ; D2820001 04062404 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s4, v0, v1 ; D2820001 04060004 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s4, v19, v1 ; D2820001 04062604 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v4 ; 10040804 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s4, v18, v2 ; D2820002 040A2404 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s4, v0, v2 ; D2820002 040A0004 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s4, v19, v2 ; D2820002 040A2604 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v4 ; 10060804 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v18, v3 ; D2820003 040E2404 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v0, v3 ; D2820003 040E0004 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v19, v3 ; D2820003 040E2604 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v4 ; 100A0804 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v18, v5 ; D2820005 04162404 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v0, v5 ; D2820005 04160004 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v19, v5 ; D2820005 04162604 exp 15, 13, 0, 0, 0, v5, v3, v2, v1 ; F80000DF 01020305 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v1, s4, v4 ; 10020804 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s4, v18, v1 ; D2820001 04062404 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s4, v0, v1 ; D2820001 04060004 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s4, v19, v1 ; D2820001 04062604 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v4 ; 10040804 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s4, v18, v2 ; D2820002 040A2404 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s4, v0, v2 ; D2820002 040A0004 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s4, v19, v2 ; D2820002 040A2604 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v4 ; 10060804 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v18, v3 ; D2820003 040E2404 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v0, v3 ; D2820003 040E0004 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v19, v3 ; D2820003 040E2604 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v4 ; 10080804 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s4, v18, v4 ; D2820004 04122404 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s4, v0, v4 ; D2820000 04120004 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s0, v19, v0 ; D2820000 04022600 exp 15, 14, 0, 1, 0, v0, v3, v2, v1 ; F80008EF 01020300 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL IN[5], GENERIC[14], PERSPECTIVE DCL IN[6], GENERIC[15], PERSPECTIVE DCL IN[7], GENERIC[16], PERSPECTIVE DCL IN[8], GENERIC[17], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL CONST[0..30] DCL TEMP[0..11], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, -0.0000, 0.0010} IMM[1] FLT32 { -0.0010, 0.0010, 0.0000, 1.0000} IMM[2] FLT32 { 0.0749, 0.1236, 0.2060, -20000.0000} IMM[3] FLT32 { 2.0000, -1.0000, 0.5000, -0.0100} IMM[4] FLT32 { 0.0100, 0.0000, 0.0000, 0.0000} 0: ABS TEMP[0].x, CONST[4].wwww 1: MUL TEMP[1].x, CONST[29].wwww, IN[4].wwww 2: MOV TEMP[2].xyz, TEMP[1] 3: FSGE TEMP[3].x, -TEMP[0].xxxx, IMM[0].xxxx 4: UIF TEMP[3].xxxx :0 5: MOV TEMP[3].x, IMM[0].yyyy 6: ELSE :0 7: MOV TEMP[3].x, TEMP[1].xxxx 8: ENDIF 9: MOV TEMP[2].w, TEMP[3].xxxx 10: MOV TEMP[1].w, TEMP[2] 11: MUL TEMP[0], IMM[0].yyyz, IN[8].xyzx 12: ADD TEMP[3].xyz, TEMP[0], IMM[0].wwxy 13: MOV TEMP[4].xy, TEMP[3].xyyy 14: MOV TEMP[4].z, TEMP[3].zzzz 15: TEX TEMP[4].x, TEMP[4], SAMP[7], SHADOW2D 16: MOV TEMP[3].x, TEMP[4].xxxx 17: ADD TEMP[4].xyz, TEMP[0], IMM[1] 18: MOV TEMP[5].xy, TEMP[4].xyyy 19: MOV TEMP[5].z, TEMP[4].zzzz 20: TEX TEMP[5].x, TEMP[5], SAMP[7], SHADOW2D 21: MOV TEMP[3].y, TEMP[5].xxxx 22: ADD TEMP[4].xyz, TEMP[0], IMM[1].yxzw 23: MOV TEMP[5].xy, TEMP[4].xyyy 24: MOV TEMP[5].z, TEMP[4].zzzz 25: TEX TEMP[5].x, TEMP[5], SAMP[7], SHADOW2D 26: MOV TEMP[3].z, TEMP[5].xxxx 27: ADD TEMP[4].xyz, TEMP[0], IMM[1].xxzw 28: MOV TEMP[5].xy, TEMP[4].xyyy 29: MOV TEMP[5].z, TEMP[4].zzzz 30: TEX TEMP[5].x, TEMP[5], SAMP[7], SHADOW2D 31: MOV TEMP[3].w, TEMP[5].xxxx 32: DP4 TEMP[5].x, TEMP[3], IMM[2].xxxx 33: ADD TEMP[4].xyz, TEMP[0], IMM[0].wxxy 34: MOV TEMP[6].xy, TEMP[4].xyyy 35: MOV TEMP[6].z, TEMP[4].zzzz 36: TEX TEMP[6].x, TEMP[6], SAMP[7], SHADOW2D 37: MOV TEMP[4].x, TEMP[6].xxxx 38: ADD TEMP[6].xyz, TEMP[0], IMM[1].xzzw 39: MOV TEMP[7].xy, TEMP[6].xyyy 40: MOV TEMP[7].z, TEMP[6].zzzz 41: TEX TEMP[7].x, TEMP[7], SAMP[7], SHADOW2D 42: MOV TEMP[4].y, TEMP[7].xxxx 43: ADD TEMP[6].xyz, TEMP[0], IMM[1].zxzw 44: MOV TEMP[7].xy, TEMP[6].xyyy 45: MOV TEMP[7].z, TEMP[6].zzzz 46: TEX TEMP[7].x, TEMP[7], SAMP[7], SHADOW2D 47: MOV TEMP[4].z, TEMP[7].xxxx 48: ADD TEMP[6].xyz, TEMP[0], IMM[0].xwxy 49: ADD TEMP[0].xyz, TEMP[0], IMM[0].zzzy 50: MOV TEMP[7].xy, TEMP[6].xyyy 51: MOV TEMP[7].z, TEMP[6].zzzz 52: TEX TEMP[7].x, TEMP[7], SAMP[7], SHADOW2D 53: MOV TEMP[4].w, TEMP[7].xxxx 54: DP4 TEMP[7].x, TEMP[4], IMM[2].yyyy 55: ADD TEMP[3].x, TEMP[5].xxxx, TEMP[7].xxxx 56: MOV TEMP[5].xy, TEMP[0].xyyy 57: MOV TEMP[5].z, TEMP[0].zzzz 58: TEX TEMP[5].x, TEMP[5], SAMP[7], SHADOW2D 59: MAD TEMP[0].x, TEMP[5].xxxx, IMM[2].zzzz, TEMP[3].xxxx 60: ADD TEMP[5].x, IMM[2].wwww, IN[4].zzzz 61: RCP TEMP[7].x, CONST[7].zzzz 62: MUL TEMP[5].x, -TEMP[5].xxxx, TEMP[7].xxxx 63: MAD TEMP[5].xy, CONST[7].xyyy, TEMP[5].xxxx, IN[4].xyyy 64: RCP TEMP[7].x, CONST[22].wwww 65: MAD TEMP[3].xy, TEMP[5].xyyy, TEMP[7].xxxx, CONST[6].xyyy 66: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[7].xxxx, CONST[6].zwww 67: MOV TEMP[7].xy, TEMP[3].xyyy 68: TEX TEMP[7].x, TEMP[7], SAMP[6], 2D 69: MOV TEMP[5].xy, TEMP[5].xyyy 70: TEX TEMP[5].y, TEMP[5], SAMP[6], 2D 71: MUL TEMP[8].x, TEMP[7].xxxx, TEMP[5].yyyy 72: MAD TEMP[5].x, TEMP[7].xxxx, -TEMP[5].yyyy, IMM[0].yyyy 73: MUL TEMP[3].xyz, TEMP[8].xxxx, CONST[28].xyzz 74: MUL TEMP[3].xyz, TEMP[0].xxxx, TEMP[3].xyzz 75: ADD TEMP[7].x, -TEMP[0].xxxx, IMM[0].yyyy 76: MAX TEMP[5].x, TEMP[7].xxxx, TEMP[5].xxxx 77: MUL TEMP[5].xyz, TEMP[5].xxxx, CONST[21].xyzz 78: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[1].xxxx 79: MOV TEMP[7].xy, IN[0].xyyy 80: TEX TEMP[7].xyz, TEMP[7], SAMP[1], 2D 81: MAD TEMP[4].xyz, TEMP[7].xyzz, IMM[3].xxxx, IMM[3].yyyy 82: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz 83: RSQ TEMP[7].x, TEMP[7].xxxx 84: MUL TEMP[7].xyz, TEMP[4].xyzz, TEMP[7].xxxx 85: MUL TEMP[4].xyz, TEMP[7].yyyy, IN[6].xyzz 86: MAD TEMP[4].xyz, TEMP[7].xxxx, IN[5].xyzz, TEMP[4].xyzz 87: MAD TEMP[4].xyz, TEMP[7].zzzz, IN[7].xyzz, TEMP[4].xyzz 88: DP3 TEMP[7].x, TEMP[4].xyzz, TEMP[4].xyzz 89: RSQ TEMP[7].x, TEMP[7].xxxx 90: MUL TEMP[7].xyz, TEMP[4].xyzz, TEMP[7].xxxx 91: DP3 TEMP[8].x, CONST[22].xyzz, TEMP[7].xyzz 92: MOV_SAT TEMP[8].x, TEMP[8].xxxx 93: MOV TEMP[3].w, TEMP[8].xxxx 94: MUL TEMP[4].xyz, TEMP[8].xxxx, CONST[20].xyzz 95: MAD TEMP[5].xyz, TEMP[4].xyzz, IN[1].xxxx, TEMP[5].xyzz 96: MOV TEMP[2].xyz, TEMP[3] 97: FSGE TEMP[8].x, -IN[8].wwww, IMM[0].xxxx 98: UIF TEMP[8].xxxx :0 99: MOV TEMP[8].x, IMM[0].zzzz 100: ELSE :0 101: MOV TEMP[8].x, IMM[0].yyyy 102: ENDIF 103: MOV TEMP[3].xyz, TEMP[3].xyzx 104: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[8].xxxx 105: MOV TEMP[8].xyz, -CONST[7].xyzx 106: DP3 TEMP[9].x, TEMP[8].xyzz, TEMP[8].xyzz 107: RSQ TEMP[9].x, TEMP[9].xxxx 108: MUL TEMP[4].xyz, TEMP[8].xyzz, TEMP[9].xxxx 109: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[4].xyzz 110: MAD TEMP[9].x, TEMP[8].xxxx, IMM[3].zzzz, IMM[3].zzzz 111: MOV_SAT TEMP[8].x, TEMP[8].xxxx 112: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[9].xxxx 113: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[2].xyzz, TEMP[5].xyzz 114: DP3 TEMP[2].x, IN[2].xyzz, IN[2].xyzz 115: RSQ TEMP[2].x, TEMP[2].xxxx 116: MUL TEMP[2].xyz, IN[2].xyzz, TEMP[2].xxxx 117: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[7].xyzz 118: ADD TEMP[10].x, TEMP[5].xxxx, TEMP[5].xxxx 119: MOV TEMP[9].x, TEMP[5].xxxx 120: MOV_SAT TEMP[9].x, TEMP[5].xxxx 121: MAD TEMP[5].xyz, TEMP[7].xyzz, -TEMP[10].xxxx, TEMP[2].xyzz 122: MAX TEMP[7].x, TEMP[7].zzzz, IMM[0].zzzz 123: DP3 TEMP[4].x, TEMP[4].xyzz, -TEMP[5].xyzz 124: MOV_SAT TEMP[5].x, TEMP[4].xxxx 125: MOV TEMP[4].x, TEMP[5].xxxx 126: ADD TEMP[10].x, TEMP[5].xxxx, IMM[3].wwww 127: FSGE TEMP[10].x, TEMP[10].xxxx, IMM[0].xxxx 128: UIF TEMP[10].xxxx :0 129: MOV TEMP[5].x, TEMP[5].xxxx 130: ELSE :0 131: MOV TEMP[5].x, IMM[4].xxxx 132: ENDIF 133: MOV_SAT TEMP[4].x, TEMP[5].xxxx 134: MOV TEMP[5].xy, IN[0].xyyy 135: TEX TEMP[5], TEMP[5], SAMP[4], 2D 136: MAX TEMP[5], TEMP[5], CONST[27] 137: MUL TEMP[10].x, TEMP[5].wwww, CONST[1].xxxx 138: POW TEMP[10].x, TEMP[4].xxxx, TEMP[10].xxxx 139: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[10].xxxx 140: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[8].xxxx 141: MUL TEMP[3].xyz, TEMP[3].xyzz, CONST[0].wwww 142: MUL TEMP[3].xyz, TEMP[5].xxxx, TEMP[3].xyzz 143: MOV TEMP[8].xy, IN[0].xyyy 144: TEX TEMP[8].xyz, TEMP[8], SAMP[0], 2D 145: LRP TEMP[6].xyz, TEMP[5].zzzz, CONST[0].xyzz, TEMP[8].xyzz 146: MUL TEMP[2].xyz, TEMP[5].yyyy, IN[3].xyzz 147: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[6].xyzz 148: MOV TEMP[9].y, IMM[3].zzzz 149: MOV TEMP[5].xy, TEMP[9].xyyy 150: TEX TEMP[5].xz, TEMP[5], SAMP[5], 2D 151: MOV TEMP[6].xy, IN[0].xyyy 152: TEX TEMP[6].zw, TEMP[6], SAMP[3], 2D 153: MAX TEMP[6].xy, TEMP[6].wzzz, CONST[26].wzzz 154: MAX TEMP[10].x, TEMP[5].zzzz, TEMP[6].yyyy 155: MUL TEMP[9].xyz, TEMP[3].xyzz, TEMP[10].xxxx 156: MOV TEMP[11].xy, IN[0].zwww 157: TEX TEMP[11].xyz, TEMP[11], SAMP[2], 2D 158: MAD TEMP[4].xyz, TEMP[11].xyzz, CONST[3].wwww, TEMP[8].xyzz 159: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xyzz, TEMP[9].xyzz 160: MAD TEMP[3].xyz, TEMP[3].xyzz, TEMP[10].xxxx, -TEMP[0].xyzz 161: MAD TEMP[0].xyz, TEMP[6].yyyy, TEMP[3].xyzz, TEMP[0].xyzz 162: MOV_SAT TEMP[6].x, TEMP[6].xxxx 163: MUL TEMP[3].xyz, TEMP[7].xxxx, TEMP[2].xyzz 164: MAD TEMP[0].xyz, TEMP[3].xyzz, TEMP[5].xxxx, TEMP[0].xyzz 165: LRP TEMP[3].xyz, TEMP[6].xxxx, TEMP[4].xyzz, TEMP[0].xyzz 166: MUL TEMP[0].xyz, TEMP[3].xyzz, CONST[30].xxxx 167: MAD TEMP[3].xyz, TEMP[3].xyzz, -CONST[30].xxxx, CONST[29].xyzz 168: MUL TEMP[2].x, IN[3].wwww, IN[3].wwww 169: MAD TEMP[1].xyz, TEMP[2].xxxx, TEMP[3].xyzz, TEMP[0].xyzz 170: MOV OUT[0], TEMP[1] 171: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 328) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 340) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 344) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 356) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 360) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 364) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 424) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 428) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 432) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 436) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 440) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 444) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 448) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 452) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 456) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 464) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 468) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 472) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 476) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 480) %62 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %63 = load <8 x i32> addrspace(2)* %62, !tbaa !0 %64 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %65 = load <4 x i32> addrspace(2)* %64, !tbaa !0 %66 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %67 = load <8 x i32> addrspace(2)* %66, !tbaa !0 %68 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %69 = load <4 x i32> addrspace(2)* %68, !tbaa !0 %70 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %75 = load <8 x i32> addrspace(2)* %74, !tbaa !0 %76 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %77 = load <4 x i32> addrspace(2)* %76, !tbaa !0 %78 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %79 = load <8 x i32> addrspace(2)* %78, !tbaa !0 %80 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %81 = load <4 x i32> addrspace(2)* %80, !tbaa !0 %82 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %83 = load <8 x i32> addrspace(2)* %82, !tbaa !0 %84 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %85 = load <4 x i32> addrspace(2)* %84, !tbaa !0 %86 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %87 = load <8 x i32> addrspace(2)* %86, !tbaa !0 %88 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %89 = load <4 x i32> addrspace(2)* %88, !tbaa !0 %90 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %91 = load <8 x i32> addrspace(2)* %90, !tbaa !0 %92 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %93 = load <4 x i32> addrspace(2)* %92, !tbaa !0 %94 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %5, <2 x i32> %7) %117 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 2, i32 7, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 0, i32 8, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 1, i32 8, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 2, i32 8, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 3, i32 8, i32 %5, <2 x i32> %7) %123 = call float @fabs(float %30) %124 = fmul float %60, %109 %125 = fsub float -0.000000e+00, %123 %126 = fcmp oge float %125, 0.000000e+00 %127 = sext i1 %126 to i32 %128 = bitcast i32 %127 to float %129 = bitcast float %128 to i32 %130 = icmp ne i32 %129, 0 %. = select i1 %130, float 1.000000e+00, float %124 %131 = fmul float 1.000000e+00, %119 %132 = fmul float 1.000000e+00, %120 %133 = fmul float 1.000000e+00, %121 %134 = fadd float %131, 9.765625e-04 %135 = fadd float %132, 9.765625e-04 %136 = fadd float %133, 0.000000e+00 %137 = bitcast float %136 to i32 %138 = bitcast float %134 to i32 %139 = bitcast float %135 to i32 %140 = insertelement <4 x i32> undef, i32 %137, i32 0 %141 = insertelement <4 x i32> %140, i32 %138, i32 1 %142 = insertelement <4 x i32> %141, i32 %139, i32 2 %143 = insertelement <4 x i32> %142, i32 undef, i32 3 %144 = bitcast <8 x i32> %91 to <32 x i8> %145 = bitcast <4 x i32> %93 to <16 x i8> %146 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %143, <32 x i8> %144, <16 x i8> %145, i32 7) %147 = extractelement <4 x float> %146, i32 0 %148 = fadd float %131, -9.765625e-04 %149 = fadd float %132, 9.765625e-04 %150 = fadd float %133, 0.000000e+00 %151 = bitcast float %150 to i32 %152 = bitcast float %148 to i32 %153 = bitcast float %149 to i32 %154 = insertelement <4 x i32> undef, i32 %151, i32 0 %155 = insertelement <4 x i32> %154, i32 %152, i32 1 %156 = insertelement <4 x i32> %155, i32 %153, i32 2 %157 = insertelement <4 x i32> %156, i32 undef, i32 3 %158 = bitcast <8 x i32> %91 to <32 x i8> %159 = bitcast <4 x i32> %93 to <16 x i8> %160 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %157, <32 x i8> %158, <16 x i8> %159, i32 7) %161 = extractelement <4 x float> %160, i32 0 %162 = fadd float %131, 9.765625e-04 %163 = fadd float %132, -9.765625e-04 %164 = fadd float %133, 0.000000e+00 %165 = bitcast float %164 to i32 %166 = bitcast float %162 to i32 %167 = bitcast float %163 to i32 %168 = insertelement <4 x i32> undef, i32 %165, i32 0 %169 = insertelement <4 x i32> %168, i32 %166, i32 1 %170 = insertelement <4 x i32> %169, i32 %167, i32 2 %171 = insertelement <4 x i32> %170, i32 undef, i32 3 %172 = bitcast <8 x i32> %91 to <32 x i8> %173 = bitcast <4 x i32> %93 to <16 x i8> %174 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %171, <32 x i8> %172, <16 x i8> %173, i32 7) %175 = extractelement <4 x float> %174, i32 0 %176 = fadd float %131, -9.765625e-04 %177 = fadd float %132, -9.765625e-04 %178 = fadd float %133, 0.000000e+00 %179 = bitcast float %178 to i32 %180 = bitcast float %176 to i32 %181 = bitcast float %177 to i32 %182 = insertelement <4 x i32> undef, i32 %179, i32 0 %183 = insertelement <4 x i32> %182, i32 %180, i32 1 %184 = insertelement <4 x i32> %183, i32 %181, i32 2 %185 = insertelement <4 x i32> %184, i32 undef, i32 3 %186 = bitcast <8 x i32> %91 to <32 x i8> %187 = bitcast <4 x i32> %93 to <16 x i8> %188 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %185, <32 x i8> %186, <16 x i8> %187, i32 7) %189 = extractelement <4 x float> %188, i32 0 %190 = fmul float %147, 0x3FB32D1040000000 %191 = fmul float %161, 0x3FB32D1040000000 %192 = fadd float %190, %191 %193 = fmul float %175, 0x3FB32D1040000000 %194 = fadd float %192, %193 %195 = fmul float %189, 0x3FB32D1040000000 %196 = fadd float %194, %195 %197 = fadd float %131, 9.765625e-04 %198 = fadd float %132, 0.000000e+00 %199 = fadd float %133, 0.000000e+00 %200 = bitcast float %199 to i32 %201 = bitcast float %197 to i32 %202 = bitcast float %198 to i32 %203 = insertelement <4 x i32> undef, i32 %200, i32 0 %204 = insertelement <4 x i32> %203, i32 %201, i32 1 %205 = insertelement <4 x i32> %204, i32 %202, i32 2 %206 = insertelement <4 x i32> %205, i32 undef, i32 3 %207 = bitcast <8 x i32> %91 to <32 x i8> %208 = bitcast <4 x i32> %93 to <16 x i8> %209 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %206, <32 x i8> %207, <16 x i8> %208, i32 7) %210 = extractelement <4 x float> %209, i32 0 %211 = fadd float %131, -9.765625e-04 %212 = fadd float %132, 0.000000e+00 %213 = fadd float %133, 0.000000e+00 %214 = bitcast float %213 to i32 %215 = bitcast float %211 to i32 %216 = bitcast float %212 to i32 %217 = insertelement <4 x i32> undef, i32 %214, i32 0 %218 = insertelement <4 x i32> %217, i32 %215, i32 1 %219 = insertelement <4 x i32> %218, i32 %216, i32 2 %220 = insertelement <4 x i32> %219, i32 undef, i32 3 %221 = bitcast <8 x i32> %91 to <32 x i8> %222 = bitcast <4 x i32> %93 to <16 x i8> %223 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %220, <32 x i8> %221, <16 x i8> %222, i32 7) %224 = extractelement <4 x float> %223, i32 0 %225 = fadd float %131, 0.000000e+00 %226 = fadd float %132, -9.765625e-04 %227 = fadd float %133, 0.000000e+00 %228 = bitcast float %227 to i32 %229 = bitcast float %225 to i32 %230 = bitcast float %226 to i32 %231 = insertelement <4 x i32> undef, i32 %228, i32 0 %232 = insertelement <4 x i32> %231, i32 %229, i32 1 %233 = insertelement <4 x i32> %232, i32 %230, i32 2 %234 = insertelement <4 x i32> %233, i32 undef, i32 3 %235 = bitcast <8 x i32> %91 to <32 x i8> %236 = bitcast <4 x i32> %93 to <16 x i8> %237 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %234, <32 x i8> %235, <16 x i8> %236, i32 7) %238 = extractelement <4 x float> %237, i32 0 %239 = fadd float %131, 0.000000e+00 %240 = fadd float %132, 9.765625e-04 %241 = fadd float %133, 0.000000e+00 %242 = fadd float %131, -0.000000e+00 %243 = fadd float %132, -0.000000e+00 %244 = fadd float %133, -0.000000e+00 %245 = bitcast float %241 to i32 %246 = bitcast float %239 to i32 %247 = bitcast float %240 to i32 %248 = insertelement <4 x i32> undef, i32 %245, i32 0 %249 = insertelement <4 x i32> %248, i32 %246, i32 1 %250 = insertelement <4 x i32> %249, i32 %247, i32 2 %251 = insertelement <4 x i32> %250, i32 undef, i32 3 %252 = bitcast <8 x i32> %91 to <32 x i8> %253 = bitcast <4 x i32> %93 to <16 x i8> %254 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %251, <32 x i8> %252, <16 x i8> %253, i32 7) %255 = extractelement <4 x float> %254, i32 0 %256 = fmul float %210, 0x3FBFA3F480000000 %257 = fmul float %224, 0x3FBFA3F480000000 %258 = fadd float %256, %257 %259 = fmul float %238, 0x3FBFA3F480000000 %260 = fadd float %258, %259 %261 = fmul float %255, 0x3FBFA3F480000000 %262 = fadd float %260, %261 %263 = fadd float %196, %262 %264 = bitcast float %244 to i32 %265 = bitcast float %242 to i32 %266 = bitcast float %243 to i32 %267 = insertelement <4 x i32> undef, i32 %264, i32 0 %268 = insertelement <4 x i32> %267, i32 %265, i32 1 %269 = insertelement <4 x i32> %268, i32 %266, i32 2 %270 = insertelement <4 x i32> %269, i32 undef, i32 3 %271 = bitcast <8 x i32> %91 to <32 x i8> %272 = bitcast <4 x i32> %93 to <16 x i8> %273 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %270, <32 x i8> %271, <16 x i8> %272, i32 7) %274 = extractelement <4 x float> %273, i32 0 %275 = fmul float %274, 0x3FCA5DF660000000 %276 = fadd float %275, %263 %277 = fadd float -2.000000e+04, %108 %278 = fdiv float 1.000000e+00, %37 %279 = fsub float -0.000000e+00, %277 %280 = fmul float %279, %278 %281 = fmul float %35, %280 %282 = fadd float %281, %106 %283 = fmul float %36, %280 %284 = fadd float %283, %107 %285 = fdiv float 1.000000e+00, %47 %286 = fmul float %282, %285 %287 = fadd float %286, %31 %288 = fmul float %284, %285 %289 = fadd float %288, %32 %290 = fmul float %282, %285 %291 = fadd float %290, %33 %292 = fmul float %284, %285 %293 = fadd float %292, %34 %294 = bitcast float %287 to i32 %295 = bitcast float %289 to i32 %296 = insertelement <2 x i32> undef, i32 %294, i32 0 %297 = insertelement <2 x i32> %296, i32 %295, i32 1 %298 = bitcast <8 x i32> %87 to <32 x i8> %299 = bitcast <4 x i32> %89 to <16 x i8> %300 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %297, <32 x i8> %298, <16 x i8> %299, i32 2) %301 = extractelement <4 x float> %300, i32 0 %302 = bitcast float %291 to i32 %303 = bitcast float %293 to i32 %304 = insertelement <2 x i32> undef, i32 %302, i32 0 %305 = insertelement <2 x i32> %304, i32 %303, i32 1 %306 = bitcast <8 x i32> %87 to <32 x i8> %307 = bitcast <4 x i32> %89 to <16 x i8> %308 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %305, <32 x i8> %306, <16 x i8> %307, i32 2) %309 = extractelement <4 x float> %308, i32 1 %310 = fmul float %301, %309 %311 = fsub float -0.000000e+00, %309 %312 = fmul float %301, %311 %313 = fadd float %312, 1.000000e+00 %314 = fmul float %310, %54 %315 = fmul float %310, %55 %316 = fmul float %310, %56 %317 = fmul float %276, %314 %318 = fmul float %276, %315 %319 = fmul float %276, %316 %320 = fsub float -0.000000e+00, %276 %321 = fadd float %320, 1.000000e+00 %322 = call float @llvm.maxnum.f32(float %321, float %313) %323 = fmul float %322, %41 %324 = fmul float %322, %42 %325 = fmul float %322, %43 %326 = fmul float %323, %98 %327 = fmul float %324, %98 %328 = fmul float %325, %98 %329 = bitcast float %94 to i32 %330 = bitcast float %95 to i32 %331 = insertelement <2 x i32> undef, i32 %329, i32 0 %332 = insertelement <2 x i32> %331, i32 %330, i32 1 %333 = bitcast <8 x i32> %67 to <32 x i8> %334 = bitcast <4 x i32> %69 to <16 x i8> %335 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %332, <32 x i8> %333, <16 x i8> %334, i32 2) %336 = extractelement <4 x float> %335, i32 0 %337 = extractelement <4 x float> %335, i32 1 %338 = extractelement <4 x float> %335, i32 2 %339 = fmul float %336, 2.000000e+00 %340 = fadd float %339, -1.000000e+00 %341 = fmul float %337, 2.000000e+00 %342 = fadd float %341, -1.000000e+00 %343 = fmul float %338, 2.000000e+00 %344 = fadd float %343, -1.000000e+00 %345 = fmul float %340, %340 %346 = fmul float %342, %342 %347 = fadd float %346, %345 %348 = fmul float %344, %344 %349 = fadd float %347, %348 %350 = call float @llvm.AMDGPU.rsq.clamped.f32(float %349) %351 = fmul float %340, %350 %352 = fmul float %342, %350 %353 = fmul float %344, %350 %354 = fmul float %352, %113 %355 = fmul float %352, %114 %356 = fmul float %352, %115 %357 = fmul float %351, %110 %358 = fadd float %357, %354 %359 = fmul float %351, %111 %360 = fadd float %359, %355 %361 = fmul float %351, %112 %362 = fadd float %361, %356 %363 = fmul float %353, %116 %364 = fadd float %363, %358 %365 = fmul float %353, %117 %366 = fadd float %365, %360 %367 = fmul float %353, %118 %368 = fadd float %367, %362 %369 = fmul float %364, %364 %370 = fmul float %366, %366 %371 = fadd float %370, %369 %372 = fmul float %368, %368 %373 = fadd float %371, %372 %374 = call float @llvm.AMDGPU.rsq.clamped.f32(float %373) %375 = fmul float %364, %374 %376 = fmul float %366, %374 %377 = fmul float %368, %374 %378 = fmul float %44, %375 %379 = fmul float %45, %376 %380 = fadd float %379, %378 %381 = fmul float %46, %377 %382 = fadd float %380, %381 %383 = call float @llvm.AMDIL.clamp.(float %382, float 0.000000e+00, float 1.000000e+00) %384 = fmul float %383, %38 %385 = fmul float %383, %39 %386 = fmul float %383, %40 %387 = fmul float %384, %98 %388 = fadd float %387, %326 %389 = fmul float %385, %98 %390 = fadd float %389, %327 %391 = fmul float %386, %98 %392 = fadd float %391, %328 %393 = fsub float -0.000000e+00, %122 %394 = fcmp oge float %393, 0.000000e+00 %395 = sext i1 %394 to i32 %396 = bitcast i32 %395 to float %397 = bitcast float %396 to i32 %398 = icmp ne i32 %397, 0 %temp32.0 = select i1 %398, float -0.000000e+00, float 1.000000e+00 %399 = fmul float %276, %temp32.0 %400 = fsub float -0.000000e+00, %35 %401 = fsub float -0.000000e+00, %36 %402 = fsub float -0.000000e+00, %37 %403 = fmul float %400, %400 %404 = fmul float %401, %401 %405 = fadd float %404, %403 %406 = fmul float %402, %402 %407 = fadd float %405, %406 %408 = call float @llvm.AMDGPU.rsq.clamped.f32(float %407) %409 = fmul float %400, %408 %410 = fmul float %401, %408 %411 = fmul float %402, %408 %412 = fmul float %375, %409 %413 = fmul float %376, %410 %414 = fadd float %413, %412 %415 = fmul float %377, %411 %416 = fadd float %414, %415 %417 = fmul float %416, 5.000000e-01 %418 = fadd float %417, 5.000000e-01 %419 = call float @llvm.AMDIL.clamp.(float %416, float 0.000000e+00, float 1.000000e+00) %420 = fmul float %399, %418 %421 = fmul float %420, %317 %422 = fadd float %421, %388 %423 = fmul float %420, %318 %424 = fadd float %423, %390 %425 = fmul float %420, %319 %426 = fadd float %425, %392 %427 = fmul float %99, %99 %428 = fmul float %100, %100 %429 = fadd float %428, %427 %430 = fmul float %101, %101 %431 = fadd float %429, %430 %432 = call float @llvm.AMDGPU.rsq.clamped.f32(float %431) %433 = fmul float %99, %432 %434 = fmul float %100, %432 %435 = fmul float %101, %432 %436 = fmul float %433, %375 %437 = fmul float %434, %376 %438 = fadd float %437, %436 %439 = fmul float %435, %377 %440 = fadd float %438, %439 %441 = fadd float %440, %440 %442 = call float @llvm.AMDIL.clamp.(float %440, float 0.000000e+00, float 1.000000e+00) %443 = fsub float -0.000000e+00, %441 %444 = fmul float %375, %443 %445 = fadd float %444, %433 %446 = fsub float -0.000000e+00, %441 %447 = fmul float %376, %446 %448 = fadd float %447, %434 %449 = fsub float -0.000000e+00, %441 %450 = fmul float %377, %449 %451 = fadd float %450, %435 %452 = call float @llvm.maxnum.f32(float %377, float -0.000000e+00) %453 = fsub float -0.000000e+00, %445 %454 = fsub float -0.000000e+00, %448 %455 = fsub float -0.000000e+00, %451 %456 = fmul float %409, %453 %457 = fmul float %410, %454 %458 = fadd float %457, %456 %459 = fmul float %411, %455 %460 = fadd float %458, %459 %461 = call float @llvm.AMDIL.clamp.(float %460, float 0.000000e+00, float 1.000000e+00) %462 = fadd float %461, 0xBF847AE140000000 %463 = fcmp oge float %462, 0.000000e+00 %464 = sext i1 %463 to i32 %465 = bitcast i32 %464 to float %466 = bitcast float %465 to i32 %467 = icmp ne i32 %466, 0 %.54 = select i1 %467, float %461, float 0x3F847AE140000000 %468 = call float @llvm.AMDIL.clamp.(float %.54, float 0.000000e+00, float 1.000000e+00) %469 = bitcast float %94 to i32 %470 = bitcast float %95 to i32 %471 = insertelement <2 x i32> undef, i32 %469, i32 0 %472 = insertelement <2 x i32> %471, i32 %470, i32 1 %473 = bitcast <8 x i32> %79 to <32 x i8> %474 = bitcast <4 x i32> %81 to <16 x i8> %475 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %472, <32 x i8> %473, <16 x i8> %474, i32 2) %476 = extractelement <4 x float> %475, i32 0 %477 = extractelement <4 x float> %475, i32 1 %478 = extractelement <4 x float> %475, i32 2 %479 = extractelement <4 x float> %475, i32 3 %480 = call float @llvm.maxnum.f32(float %476, float %50) %481 = call float @llvm.maxnum.f32(float %477, float %51) %482 = call float @llvm.maxnum.f32(float %478, float %52) %483 = call float @llvm.maxnum.f32(float %479, float %53) %484 = fmul float %483, %28 %485 = call float @llvm.pow.f32(float %468, float %484) %486 = fmul float %419, %485 %487 = fmul float %317, %486 %488 = fmul float %318, %486 %489 = fmul float %319, %486 %490 = fmul float %487, %27 %491 = fmul float %488, %27 %492 = fmul float %489, %27 %493 = fmul float %480, %490 %494 = fmul float %480, %491 %495 = fmul float %480, %492 %496 = bitcast float %94 to i32 %497 = bitcast float %95 to i32 %498 = insertelement <2 x i32> undef, i32 %496, i32 0 %499 = insertelement <2 x i32> %498, i32 %497, i32 1 %500 = bitcast <8 x i32> %63 to <32 x i8> %501 = bitcast <4 x i32> %65 to <16 x i8> %502 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %499, <32 x i8> %500, <16 x i8> %501, i32 2) %503 = extractelement <4 x float> %502, i32 0 %504 = extractelement <4 x float> %502, i32 1 %505 = extractelement <4 x float> %502, i32 2 %506 = call float @llvm.AMDGPU.lrp(float %482, float %24, float %503) %507 = call float @llvm.AMDGPU.lrp(float %482, float %25, float %504) %508 = call float @llvm.AMDGPU.lrp(float %482, float %26, float %505) %509 = fmul float %481, %102 %510 = fmul float %481, %103 %511 = fmul float %481, %104 %512 = fmul float %493, %506 %513 = fmul float %494, %507 %514 = fmul float %495, %508 %515 = bitcast float %442 to i32 %516 = bitcast float 5.000000e-01 to i32 %517 = insertelement <2 x i32> undef, i32 %515, i32 0 %518 = insertelement <2 x i32> %517, i32 %516, i32 1 %519 = bitcast <8 x i32> %83 to <32 x i8> %520 = bitcast <4 x i32> %85 to <16 x i8> %521 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %518, <32 x i8> %519, <16 x i8> %520, i32 2) %522 = extractelement <4 x float> %521, i32 0 %523 = extractelement <4 x float> %521, i32 2 %524 = bitcast float %94 to i32 %525 = bitcast float %95 to i32 %526 = insertelement <2 x i32> undef, i32 %524, i32 0 %527 = insertelement <2 x i32> %526, i32 %525, i32 1 %528 = bitcast <8 x i32> %75 to <32 x i8> %529 = bitcast <4 x i32> %77 to <16 x i8> %530 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %527, <32 x i8> %528, <16 x i8> %529, i32 2) %531 = extractelement <4 x float> %530, i32 2 %532 = extractelement <4 x float> %530, i32 3 %533 = call float @llvm.maxnum.f32(float %532, float %49) %534 = call float @llvm.maxnum.f32(float %531, float %48) %535 = call float @llvm.maxnum.f32(float %523, float %534) %536 = fmul float %512, %535 %537 = fmul float %513, %535 %538 = fmul float %514, %535 %539 = bitcast float %96 to i32 %540 = bitcast float %97 to i32 %541 = insertelement <2 x i32> undef, i32 %539, i32 0 %542 = insertelement <2 x i32> %541, i32 %540, i32 1 %543 = bitcast <8 x i32> %71 to <32 x i8> %544 = bitcast <4 x i32> %73 to <16 x i8> %545 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %542, <32 x i8> %543, <16 x i8> %544, i32 2) %546 = extractelement <4 x float> %545, i32 0 %547 = extractelement <4 x float> %545, i32 1 %548 = extractelement <4 x float> %545, i32 2 %549 = fmul float %546, %29 %550 = fadd float %549, %503 %551 = fmul float %547, %29 %552 = fadd float %551, %504 %553 = fmul float %548, %29 %554 = fadd float %553, %505 %555 = fmul float %422, %550 %556 = fadd float %555, %536 %557 = fmul float %424, %552 %558 = fadd float %557, %537 %559 = fmul float %426, %554 %560 = fadd float %559, %538 %561 = fsub float -0.000000e+00, %556 %562 = fmul float %512, %535 %563 = fadd float %562, %561 %564 = fsub float -0.000000e+00, %558 %565 = fmul float %513, %535 %566 = fadd float %565, %564 %567 = fsub float -0.000000e+00, %560 %568 = fmul float %514, %535 %569 = fadd float %568, %567 %570 = fmul float %534, %563 %571 = fadd float %570, %556 %572 = fmul float %534, %566 %573 = fadd float %572, %558 %574 = fmul float %534, %569 %575 = fadd float %574, %560 %576 = call float @llvm.AMDIL.clamp.(float %533, float 0.000000e+00, float 1.000000e+00) %577 = fmul float %452, %509 %578 = fmul float %452, %510 %579 = fmul float %452, %511 %580 = fmul float %577, %522 %581 = fadd float %580, %571 %582 = fmul float %578, %522 %583 = fadd float %582, %573 %584 = fmul float %579, %522 %585 = fadd float %584, %575 %586 = call float @llvm.AMDGPU.lrp(float %576, float %550, float %581) %587 = call float @llvm.AMDGPU.lrp(float %576, float %552, float %583) %588 = call float @llvm.AMDGPU.lrp(float %576, float %554, float %585) %589 = fmul float %586, %61 %590 = fmul float %587, %61 %591 = fmul float %588, %61 %592 = fsub float -0.000000e+00, %61 %593 = fmul float %586, %592 %594 = fadd float %593, %57 %595 = fsub float -0.000000e+00, %61 %596 = fmul float %587, %595 %597 = fadd float %596, %58 %598 = fsub float -0.000000e+00, %61 %599 = fmul float %588, %598 %600 = fadd float %599, %59 %601 = fmul float %105, %105 %602 = fmul float %601, %594 %603 = fadd float %602, %589 %604 = fmul float %601, %597 %605 = fadd float %604, %590 %606 = fmul float %601, %600 %607 = fadd float %606, %591 %608 = call i32 @llvm.SI.packf16(float %603, float %605) %609 = bitcast i32 %608 to float %610 = call i32 @llvm.SI.packf16(float %607, float %.) %611 = bitcast i32 %610 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %609, float %611, float %609, float %611) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #4 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #3 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { readnone } attributes #4 = { nounwind readnone readonly } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[36:39], s[4:5], 0x0 ; C0920500 s_load_dwordx4 s[92:95], s[4:5], 0x4 ; C0AE0504 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C s_load_dwordx4 s[80:83], s[4:5], 0x10 ; C0A80510 s_load_dwordx4 s[24:27], s[4:5], 0x14 ; C08C0514 s_load_dwordx4 s[56:59], s[4:5], 0x18 ; C09C0518 s_load_dwordx4 s[68:71], s[4:5], 0x1c ; C0A2051C s_load_dwordx8 s[48:55], s[6:7], 0x0 ; C0D80700 s_load_dwordx8 s[8:15], s[6:7], 0x8 ; C0C40708 s_load_dwordx8 s[28:35], s[6:7], 0x10 ; C0CE0710 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v35, s28, 0 ; 0447001C v_writelane_b32 v35, s29, 1 ; 0447021D v_writelane_b32 v35, s30, 2 ; 0447041E v_writelane_b32 v35, s31, 3 ; 0447061F v_writelane_b32 v35, s32, 4 ; 04470820 v_writelane_b32 v35, s33, 5 ; 04470A21 v_writelane_b32 v35, s34, 6 ; 04470C22 v_writelane_b32 v35, s35, 7 ; 04470E23 s_load_dwordx8 s[28:35], s[6:7], 0x18 ; C0CE0718 s_load_dwordx8 s[84:91], s[6:7], 0x20 ; C0EA0720 s_load_dwordx8 s[40:47], s[6:7], 0x28 ; C0D40728 s_load_dwordx8 s[60:67], s[6:7], 0x30 ; C0DE0730 s_load_dwordx8 s[72:79], s[6:7], 0x38 ; C0E40738 image_sample v[4:6], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[8:15], s[92:95] ; F0800700 02E20402 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v7, 2.0, v5, -1.0 ; D2820007 03CE0AF4 v_mad_f32 v8, 2.0, v4, -1.0 ; D2820008 03CE08F4 v_mul_f32_e32 v9, v8, v8 ; 10121108 v_mad_f32 v9, v7, v7, v9 ; D2820009 04260F07 v_mad_f32 v4, 2.0, v6, -1.0 ; D2820004 03CE0CF4 v_mad_f32 v5, v4, v4, v9 ; D2820005 04260904 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v6, v5, v8 ; 100C1105 v_mul_f32_e32 v7, v5, v7 ; 100E0F05 v_interp_p1_f32 v8, v0, 1, 6, [m0] ; C8201900 v_interp_p2_f32 v8, [v8], v1, 1, 6, [m0] ; C8211901 v_mul_f32_e32 v8, v8, v7 ; 10100F08 v_interp_p1_f32 v9, v0, 1, 5, [m0] ; C8241500 v_interp_p2_f32 v9, [v9], v1, 1, 5, [m0] ; C8251501 v_mad_f32 v8, v6, v9, v8 ; D2820008 04221306 v_mul_f32_e32 v4, v5, v4 ; 10080905 v_interp_p1_f32 v5, v0, 1, 7, [m0] ; C8141D00 v_interp_p2_f32 v5, [v5], v1, 1, 7, [m0] ; C8151D01 v_mad_f32 v5, v4, v5, v8 ; D2820005 04220B04 v_interp_p1_f32 v8, v0, 0, 6, [m0] ; C8201800 v_interp_p2_f32 v8, [v8], v1, 0, 6, [m0] ; C8211801 v_mul_f32_e32 v8, v8, v7 ; 10100F08 v_interp_p1_f32 v9, v0, 0, 5, [m0] ; C8241400 v_interp_p2_f32 v9, [v9], v1, 0, 5, [m0] ; C8251401 v_mad_f32 v8, v6, v9, v8 ; D2820008 04221306 v_interp_p1_f32 v9, v0, 0, 7, [m0] ; C8241C00 v_interp_p2_f32 v9, [v9], v1, 0, 7, [m0] ; C8251C01 v_mad_f32 v8, v4, v9, v8 ; D2820008 04221304 v_mul_f32_e32 v9, v8, v8 ; 10121108 v_mad_f32 v9, v5, v5, v9 ; D2820009 04260B05 v_interp_p1_f32 v10, v0, 2, 6, [m0] ; C8281A00 v_interp_p2_f32 v10, [v10], v1, 2, 6, [m0] ; C8291A01 v_mul_f32_e32 v7, v10, v7 ; 100E0F0A v_interp_p1_f32 v10, v0, 2, 5, [m0] ; C8281600 v_interp_p2_f32 v10, [v10], v1, 2, 5, [m0] ; C8291601 v_mad_f32 v6, v6, v10, v7 ; D2820006 041E1506 v_interp_p1_f32 v7, v0, 2, 7, [m0] ; C81C1E00 v_interp_p2_f32 v7, [v7], v1, 2, 7, [m0] ; C81D1E01 v_mad_f32 v4, v4, v7, v6 ; D2820004 041A0F04 v_mad_f32 v6, v4, v4, v9 ; D2820006 04260904 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_mul_f32_e32 v7, v6, v8 ; 100E1106 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800 v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801 v_mul_f32_e32 v10, v9, v9 ; 10141309 v_mad_f32 v10, v8, v8, v10 ; D282000A 042A1108 v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00 v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01 v_mad_f32 v10, v11, v11, v10 ; D282000A 042A170B v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A v_mul_f32_e32 v12, v10, v9 ; 1018130A v_mul_f32_e32 v12, v7, v12 ; 10181907 v_mul_f32_e32 v13, v10, v8 ; 101A110A v_mad_f32 v12, v13, v5, v12 ; D282000C 04320B0D v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v6, v10, v11 ; 100C170A v_mad_f32 v6, v6, v4, v12 ; D2820006 04320906 v_add_f32_e32 v12, v6, v6 ; 06180D06 v_mul_f32_e32 v13, v12, v5 ; 101A0B0C v_mad_f32 v8, v8, v10, -v13 ; D2820008 84361508 v_mul_f32_e32 v13, v12, v7 ; 101A0F0C v_mad_f32 v9, v9, v10, -v13 ; D2820009 84361509 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_buffer_load_dword s5, s[0:3], 0x1d ; C202811D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e64 v13, s5, s5 ; D210000D 00000A05 v_mad_f32 v13, s4, s4, v13 ; D282000D 04340804 s_buffer_load_dword s6, s[0:3], 0x1e ; C203011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, s6, s6, v13 ; D282000D 04340C06 v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mul_f32_e64 v14, -s4, v13 ; D210000E 20021A04 v_mul_f32_e32 v9, v9, v14 ; 10121D09 v_mul_f32_e64 v15, -s5, v13 ; D210000F 20021A05 v_mad_f32 v8, v15, -v8, -v9 ; D2820008 C426110F v_mul_f32_e32 v9, v12, v4 ; 1012090C v_mad_f32 v9, v11, v10, -v9 ; D2820009 8426150B v_mul_f32_e64 v10, -s6, v13 ; D210000A 20021A06 v_mad_f32 v8, -v10, v9, v8 ; D2820008 2422130A v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 v_mov_b32_e32 v9, 0xbc23d70a ; 7E1202FF BC23D70A v_add_f32_e32 v9, v8, v9 ; 06121308 v_cmp_ge_f32_e64 s[8:9], v9, 0 ; D00C0008 00010109 v_cndmask_b32_e64 v9, 0, -1, s[8:9] ; D2000809 00218280 v_cmp_ne_i32_e64 s[8:9], v9, 0 ; D10A0008 00010109 v_mov_b32_e32 v9, 0x3c23d70a ; 7E1202FF 3C23D70A v_cndmask_b32_e64 v8, v9, v8, s[8:9] ; D2000008 08221109 v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 v_log_f32_e32 v8, v8 ; 7E104F08 image_sample v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[84:91], s[80:83] ; F0800F00 02951002 s_buffer_load_dword s7, s[0:3], 0x6f ; C203816F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_max_f32_e32 v9, s7, v19 ; 20122607 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s7, v9 ; 10121207 v_mul_legacy_f32_e32 v8, v9, v8 ; 0E101109 v_exp_f32_e32 v8, v8 ; 7E104B08 v_mul_f32_e32 v9, v14, v7 ; 10120F0E v_mad_f32 v9, v5, v15, v9 ; D2820009 04261F05 v_mad_f32 v9, v4, v10, v9 ; D2820009 04261504 v_add_f32_e64 v10, 0, v9 clamp ; D206080A 00021280 v_mul_f32_e32 v8, v8, v10 ; 10101508 v_interp_p1_f32 v10, v0, 0, 8, [m0] ; C8282000 v_interp_p2_f32 v10, [v10], v1, 0, 8, [m0] ; C8292001 v_mov_b32_e32 v11, 0xba800000 ; 7E1602FF BA800000 v_add_f32_e32 v13, v11, v10 ; 061A150B v_interp_p1_f32 v20, v0, 2, 8, [m0] ; C8502200 v_interp_p2_f32 v20, [v20], v1, 2, 8, [m0] ; C8512201 v_add_f32_e32 v12, 0, v20 ; 06182880 v_interp_p1_f32 v21, v0, 1, 8, [m0] ; C8542100 v_interp_p2_f32 v21, [v21], v1, 1, 8, [m0] ; C8552101 v_add_f32_e32 v14, 0, v21 ; 061C2A80 image_sample_c v22, 1, 0, 0, 0, 0, 0, 0, 0, v[12:15], s[72:79], s[68:71] ; F0A00100 0232160C v_mov_b32_e32 v23, 0x3dfd1fa4 ; 7E2E02FF 3DFD1FA4 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v22, 0x3dfd1fa4, v22 ; 102C2CFF 3DFD1FA4 v_add_f32_e32 v24, 0x3a800000, v10 ; 063014FF 3A800000 v_mov_b32_e32 v25, v12 ; 7E32030C v_mov_b32_e32 v26, v13 ; 7E34030D v_mov_b32_e32 v27, v14 ; 7E36030E v_mov_b32_e32 v28, v15 ; 7E38030F v_mov_b32_e32 v26, v24 ; 7E340318 v_mov_b32_e32 v27, v14 ; 7E36030E image_sample_c v24, 1, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[72:79], s[68:71] ; F0A00100 02321819 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v22, v24, v23, v22 ; D2820016 045A2F18 v_add_f32_e32 v24, 0, v10 ; 06301480 v_mov_b32_e32 v29, v12 ; 7E3A030C v_mov_b32_e32 v30, v13 ; 7E3C030D v_mov_b32_e32 v31, v14 ; 7E3E030E v_mov_b32_e32 v32, v15 ; 7E40030F v_mov_b32_e32 v30, v24 ; 7E3C0318 v_add_f32_e32 v11, v11, v21 ; 06162B0B v_mov_b32_e32 v31, v11 ; 7E3E030B image_sample_c v24, 1, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[72:79], s[68:71] ; F0A00100 0232181D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v22, v24, v23, v22 ; D2820016 045A2F18 v_add_f32_e32 v31, 0x3a800000, v21 ; 063E2AFF 3A800000 image_sample_c v24, 1, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[72:79], s[68:71] ; F0A00100 0232181D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v22, v24, v23, v22 ; D2820016 045A2F18 v_mov_b32_e32 v14, v31 ; 7E1C031F image_sample_c v23, 1, 0, 0, 0, 0, 0, 0, 0, v[12:15], s[72:79], s[68:71] ; F0A00100 0232170C v_mov_b32_e32 v24, 0x3d996882 ; 7E3002FF 3D996882 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v23, 0x3d996882, v23 ; 102E2EFF 3D996882 v_mov_b32_e32 v27, v31 ; 7E36031F image_sample_c v29, 1, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[72:79], s[68:71] ; F0A00100 02321D19 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v23, v29, v24, v23 ; D2820017 045E311D v_mov_b32_e32 v27, v11 ; 7E36030B image_sample_c v25, 1, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[72:79], s[68:71] ; F0A00100 02321919 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v23, v25, v24, v23 ; D2820017 045E3119 v_mov_b32_e32 v14, v11 ; 7E1C030B image_sample_c v11, 1, 0, 0, 0, 0, 0, 0, 0, v[12:15], s[72:79], s[68:71] ; F0A00100 02320B0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v11, v11, v24, v23 ; D282000B 045E310B v_add_f32_e32 v11, v22, v11 ; 06161716 v_mov_b32_e32 v12, 0x80000000 ; 7E1802FF 80000000 v_add_f32_e32 v23, v12, v21 ; 062E2B0C v_add_f32_e32 v22, v12, v10 ; 062C150C v_add_f32_e32 v21, v12, v20 ; 062A290C image_sample_c v10, 1, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[72:79], s[68:71] ; F0A00100 02320A15 v_mov_b32_e32 v13, 0x3e52efb3 ; 7E1A02FF 3E52EFB3 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v13, v10, v11 ; D282000A 042E150D v_interp_p1_f32 v11, v0, 1, 4, [m0] ; C82C1100 v_interp_p2_f32 v11, [v11], v1, 1, 4, [m0] ; C82D1101 v_interp_p1_f32 v13, v0, 2, 4, [m0] ; C8341200 v_interp_p2_f32 v13, [v13], v1, 2, 4, [m0] ; C8351201 v_mov_b32_e32 v14, 0xc69c4000 ; 7E1C02FF C69C4000 v_add_f32_e32 v13, v13, v14 ; 061A1D0D v_rcp_f32_e32 v14, s6 ; 7E1C5406 v_mul_f32_e64 v13, -v13, v14 ; D210000D 20021D0D v_mad_f32 v11, s5, v13, v11 ; D282000B 042E1A05 s_buffer_load_dword s5, s[0:3], 0x1b ; C202811B s_buffer_load_dword s6, s[0:3], 0x5b ; C203015B s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v14, s6 ; 7E1C5406 v_mad_f32 v21, v11, v14, s5 ; D2820015 00161D0B v_interp_p1_f32 v15, v0, 0, 4, [m0] ; C83C1000 v_interp_p2_f32 v15, [v15], v1, 0, 4, [m0] ; C83D1001 v_mad_f32 v13, s4, v13, v15 ; D282000D 043E1A04 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v20, v13, v14, s4 ; D2820014 00121D0D image_sample v15, 2, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[60:67], s[56:59] ; F0800200 01CF0F14 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v21, v11, v14, s4 ; D2820015 00121D0B s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v20, v13, v14, s4 ; D2820014 00121D0D image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[60:67], s[56:59] ; F0800100 01CF0B14 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v13, v15, v11 ; 101A170F s_buffer_load_dword s4, s[0:3], 0x71 ; C2020171 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v14, s4, v13 ; 101C1A04 v_mul_f32_e32 v14, v14, v10 ; 101C150E v_mul_f32_e32 v20, v8, v14 ; 10281D08 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v20, s4, v20 ; 10282804 s_buffer_load_dword s5, s[0:3], 0x6c ; C202816C s_waitcnt lgkmcnt(0) ; BF8C007F v_max_f32_e32 v21, s5, v16 ; 202A2005 v_mul_f32_e32 v20, v20, v21 ; 10282B14 s_buffer_load_dword s5, s[0:3], 0x6e ; C202816E s_waitcnt lgkmcnt(0) ; BF8C007F v_max_f32_e32 v22, s5, v18 ; 202C2405 v_sub_f32_e32 v23, 1.0, v22 ; 082E2CF2 image_sample v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[48:55], s[36:39] ; F0800700 012C1802 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v27, v25, v23 ; 10362F19 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v27, v22, s5, v27 ; D282001B 046C0B16 v_mul_f32_e32 v20, v27, v20 ; 1028291B v_add_f32_e64 v27, 0, v6 clamp ; D206081B 00020C80 v_mov_b32_e32 v28, 0.5 ; 7E3802F0 image_sample v[27:28], 5, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[40:47], s[24:27] ; F0800500 00CA1B1B image_sample v[2:3], 12, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[20:23] ; F0800C00 00A70202 s_buffer_load_dword s5, s[0:3], 0x6a ; C202816A s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_max_f32_e32 v6, s5, v2 ; 200C0405 v_max_f32_e32 v29, v6, v28 ; 203A3906 v_mul_f32_e32 v30, v29, v20 ; 103C291D s_buffer_load_dword s5, s[0:3], 0x58 ; C2028158 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s5, v7 ; 100E0E05 s_buffer_load_dword s5, s[0:3], 0x59 ; C2028159 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s5, v5, v7 ; D2820005 041E0A05 s_buffer_load_dword s5, s[0:3], 0x5a ; C202815A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s5, v4, v5 ; D2820005 04160805 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 s_buffer_load_dword s5, s[0:3], 0x51 ; C2028151 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s5, v5 ; 100E0A05 v_mad_f32 v11, -v11, v15, 1.0 ; D282000B 23CA1F0B v_sub_f32_e32 v15, 1.0, v10 ; 081E14F2 v_max_f32_e32 v11, v11, v15 ; 20161F0B s_buffer_load_dword s5, s[0:3], 0x55 ; C2028155 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v15, s5, v11 ; 101E1605 v_interp_p1_f32 v31, v0, 0, 1, [m0] ; C87C0400 v_interp_p2_f32 v31, [v31], v1, 0, 1, [m0] ; C87D0401 v_mul_f32_e32 v15, v31, v15 ; 101E1F1F v_mad_f32 v7, v7, v31, v15 ; D2820007 043E3F07 v_mad_f32 v9, 0.5, v9, 0.5 ; D2820009 03C212F0 v_interp_p1_f32 v15, v0, 3, 8, [m0] ; C83C2300 v_interp_p2_f32 v15, [v15], v1, 3, 8, [m0] ; C83D2301 v_cmp_ge_f32_e64 s[6:7], -v15, 0 ; D00C0006 2001010F v_cndmask_b32_e64 v15, 0, -1, s[6:7] ; D200080F 00198280 v_cmp_ne_i32_e64 s[6:7], v15, 0 ; D10A0006 0001010F v_cndmask_b32_e64 v15, 1.0, v12, s[6:7] ; D200080F 101A18F2 v_mul_f32_e32 v15, v15, v10 ; 101E150F v_mul_f32_e32 v9, v9, v15 ; 10121F09 v_mad_f32 v7, v9, v14, v7 ; D2820007 041E1D09 v_interp_p1_f32 v15, v0, 3, 0, [m0] ; C83C0300 v_interp_p2_f32 v15, [v15], v1, 3, 0, [m0] ; C83D0301 v_interp_p1_f32 v14, v0, 2, 0, [m0] ; C8380200 v_interp_p2_f32 v14, [v14], v1, 2, 0, [m0] ; C8390201 v_readlane_b32 s8, v35, 0 ; 02110123 v_readlane_b32 s9, v35, 1 ; 02130323 v_readlane_b32 s10, v35, 2 ; 02150523 v_readlane_b32 s11, v35, 3 ; 02170723 v_readlane_b32 s12, v35, 4 ; 02190923 v_readlane_b32 s13, v35, 5 ; 021B0B23 v_readlane_b32 s14, v35, 6 ; 021D0D23 v_readlane_b32 s15, v35, 7 ; 021F0F23 s_nop 2 ; BF800002 image_sample v[32:34], 7, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[16:19] ; F0800700 0082200E s_buffer_load_dword s5, s[0:3], 0xf ; C202810F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v14, v33, s5, v25 ; D282000E 04640B21 v_mad_f32 v7, v7, v14, v30 ; D2820007 047A1D07 v_mad_f32 v15, v20, v29, -v7 ; D282000F 841E3B14 v_mad_f32 v7, v6, v15, v7 ; D2820007 041E1F06 v_max_f32_e32 v4, v12, v4 ; 2008090C s_buffer_load_dword s6, s[0:3], 0x6d ; C203016D s_waitcnt lgkmcnt(0) ; BF8C007F v_max_f32_e32 v12, s6, v17 ; 20182206 v_interp_p1_f32 v15, v0, 1, 3, [m0] ; C83C0D00 v_interp_p2_f32 v15, [v15], v1, 1, 3, [m0] ; C83D0D01 v_mul_f32_e32 v15, v15, v12 ; 101E190F v_mul_f32_e32 v15, v15, v4 ; 101E090F v_mad_f32 v7, v15, v27, v7 ; D2820007 041E370F s_buffer_load_dword s6, s[0:3], 0x6b ; C203016B s_waitcnt lgkmcnt(0) ; BF8C007F v_max_f32_e32 v2, s6, v3 ; 20040606 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_sub_f32_e32 v3, 1.0, v2 ; 080604F2 v_mul_f32_e32 v7, v7, v3 ; 100E0707 v_mad_f32 v7, v2, v14, v7 ; D2820007 041E1D02 s_buffer_load_dword s6, s[0:3], 0x75 ; C2030175 s_buffer_load_dword s7, s[0:3], 0x78 ; C2038178 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v14, s6 ; 7E1C0206 v_mad_f32 v14, -v7, s7, v14 ; D282000E 24380F07 v_mul_f32_e32 v7, s7, v7 ; 100E0E07 v_interp_p1_f32 v15, v0, 3, 3, [m0] ; C83C0F00 v_interp_p2_f32 v15, [v15], v1, 3, 3, [m0] ; C83D0F01 v_mul_f32_e32 v15, v15, v15 ; 101E1F0F v_mad_f32 v7, v15, v14, v7 ; D2820007 041E1D0F s_buffer_load_dword s6, s[0:3], 0x70 ; C2030170 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v14, s6, v13 ; 101C1A06 v_mul_f32_e32 v14, v14, v10 ; 101C150E v_mul_f32_e32 v16, v8, v14 ; 10201D08 v_mul_f32_e32 v16, s4, v16 ; 10202004 v_mul_f32_e32 v16, v16, v21 ; 10202B10 v_mul_f32_e32 v17, v24, v23 ; 10222F18 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v17, v22, s6, v17 ; D2820011 04440D16 v_mul_f32_e32 v16, v17, v16 ; 10202111 v_mul_f32_e32 v17, v29, v16 ; 1022211D s_buffer_load_dword s6, s[0:3], 0x50 ; C2030150 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v18, s6, v5 ; 10240A06 s_buffer_load_dword s6, s[0:3], 0x54 ; C2030154 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v19, s6, v11 ; 10261606 v_mul_f32_e32 v19, v31, v19 ; 1026271F v_mad_f32 v18, v18, v31, v19 ; D2820012 044E3F12 v_mad_f32 v14, v9, v14, v18 ; D282000E 044A1D09 v_mad_f32 v18, v32, s5, v24 ; D2820012 04600B20 v_mad_f32 v14, v14, v18, v17 ; D282000E 0446250E v_mad_f32 v16, v16, v29, -v14 ; D2820010 843A3B10 v_mad_f32 v14, v6, v16, v14 ; D282000E 043A2106 v_interp_p1_f32 v16, v0, 0, 3, [m0] ; C8400C00 v_interp_p2_f32 v16, [v16], v1, 0, 3, [m0] ; C8410C01 v_mul_f32_e32 v16, v16, v12 ; 10201910 v_mul_f32_e32 v16, v16, v4 ; 10200910 v_mad_f32 v14, v16, v27, v14 ; D282000E 043A3710 v_mul_f32_e32 v14, v14, v3 ; 101C070E v_mad_f32 v14, v2, v18, v14 ; D282000E 043A2502 s_buffer_load_dword s6, s[0:3], 0x74 ; C2030174 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v16, s6 ; 7E200206 v_mad_f32 v16, -v14, s7, v16 ; D2820010 24400F0E v_mul_f32_e32 v14, s7, v14 ; 101C1C07 v_mad_f32 v14, v15, v16, v14 ; D282000E 043A210F v_cvt_pkrtz_f16_f32_e32 v7, v14, v7 ; 5E0E0F0E s_buffer_load_dword s6, s[0:3], 0x72 ; C2030172 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v13, s6, v13 ; 101A1A06 v_mul_f32_e32 v10, v13, v10 ; 1014150D v_mul_f32_e32 v8, v8, v10 ; 10101508 v_mul_f32_e32 v8, s4, v8 ; 10101004 v_mul_f32_e32 v8, v8, v21 ; 10102B08 v_mul_f32_e32 v13, v26, v23 ; 101A2F1A s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, v22, s4, v13 ; D282000D 04340916 v_mul_f32_e32 v8, v13, v8 ; 1010110D v_mul_f32_e32 v13, v29, v8 ; 101A111D s_buffer_load_dword s4, s[0:3], 0x52 ; C2020152 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v5 ; 100A0A04 s_buffer_load_dword s4, s[0:3], 0x56 ; C2020156 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v11, s4, v11 ; 10161604 v_mul_f32_e32 v11, v31, v11 ; 1016171F v_mad_f32 v5, v5, v31, v11 ; D2820005 042E3F05 v_mad_f32 v5, v9, v10, v5 ; D2820005 04161509 v_mad_f32 v9, v34, s5, v26 ; D2820009 04680B22 v_mad_f32 v5, v5, v9, v13 ; D2820005 04361305 v_mad_f32 v8, v8, v29, -v5 ; D2820008 84163B08 v_mad_f32 v5, v6, v8, v5 ; D2820005 04161106 v_interp_p1_f32 v6, v0, 2, 3, [m0] ; C8180E00 v_interp_p2_f32 v6, [v6], v1, 2, 3, [m0] ; C8190E01 v_mul_f32_e32 v6, v6, v12 ; 100C1906 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mad_f32 v4, v4, v27, v5 ; D2820004 04163704 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_mad_f32 v2, v2, v9, v3 ; D2820002 040E1302 s_buffer_load_dword s4, s[0:3], 0x76 ; C2020176 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s4 ; 7E060204 v_mad_f32 v3, -v2, s7, v3 ; D2820003 240C0F02 v_mul_f32_e32 v2, s7, v2 ; 10040407 v_mad_f32 v2, v15, v3, v2 ; D2820002 040A070F v_interp_p1_f32 v3, v0, 3, 4, [m0] ; C80C1300 v_interp_p2_f32 v3, [v3], v1, 3, 4, [m0] ; C80D1301 s_buffer_load_dword s4, s[0:3], 0x77 ; C2020177 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v3 ; 10000604 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ge_f32_e64 s[0:1], -|s0|, 0 ; D00C0100 20010000 v_cndmask_b32_e64 v1, 0, -1, s[0:1] ; D2000801 00018280 v_cmp_ne_i32_e64 s[0:1], v1, 0 ; D10A0000 00010101 v_cndmask_b32_e64 v0, v0, 1.0, s[0:1] ; D2000000 0001E500 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 exp 15, 0, 1, 1, 1, v7, v0, v7, v0 ; F8001C0F 00070007 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL IN[6] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[9] DCL OUT[3], GENERIC[10] DCL OUT[4], GENERIC[11] DCL OUT[5], GENERIC[12] DCL OUT[6], GENERIC[13] DCL OUT[7], GENERIC[14] DCL OUT[8], GENERIC[15] DCL OUT[9], GENERIC[16] DCL OUT[10], GENERIC[17] DCL CONST[0..241] DCL TEMP[0..16], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, -128.0000, 1.0000, -64.0000} IMM[1] FLT32 { -0.0159, 0.0159, 0.0000, 765.0059} IMM[2] INT32 {2, 1, 0, 0} IMM[3] FLT32 { 0.0001, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].w, IMM[0].xxxx 1: ADD TEMP[1], IMM[0].yyyy, IN[1] 2: FSLT TEMP[2], TEMP[1], CONST[0].xxxx 3: AND TEMP[3], TEMP[2], IMM[0].zzzz 4: ABS TEMP[4], TEMP[1] 5: ADD TEMP[1], TEMP[4], -TEMP[3] 6: ADD TEMP[1], TEMP[1], IMM[0].wwww 7: MAD TEMP[3].xyz, TEMP[3].xzww, -CONST[0].zzzz, CONST[0].yyyy 8: FSLT TEMP[4], TEMP[1], CONST[0].xxxx 9: AND TEMP[4], TEMP[4], IMM[0].zzzz 10: ABS TEMP[5], TEMP[1] 11: ADD TEMP[1], TEMP[5], -TEMP[4] 12: MAD TEMP[5].xy, TEMP[1].xzzz, IMM[1].xxxx, IMM[0].zzzz 13: MAD TEMP[6].x, TEMP[1].yyyy, IMM[1].xxxx, TEMP[5].xxxx 14: MOV TEMP[5].w, TEMP[6].xxxx 15: MAD TEMP[6].x, TEMP[1].wwww, IMM[1].xxxx, TEMP[5].yyyy 16: MOV TEMP[6].z, TEMP[6].xxxx 17: MAD TEMP[4], TEMP[4], -CONST[0].zzzz, CONST[0].yyyy 18: MUL TEMP[1], TEMP[1], IMM[1].yyyy 19: MOV TEMP[5].xz, TEMP[1].xxyx 20: DP3 TEMP[7].x, TEMP[5].xzww, TEMP[5].xzww 21: RSQ TEMP[7].x, TEMP[7].xxxx 22: MUL TEMP[7].xyz, TEMP[5].xzww, TEMP[7].xxxx 23: MUL TEMP[5].xy, TEMP[4].xyyy, TEMP[7].xyyy 24: MUL TEMP[7].x, TEMP[3].xxxx, TEMP[7].zzzz 25: MOV TEMP[5].z, TEMP[7].xxxx 26: MOV TEMP[6].xy, TEMP[1].zwzz 27: DP3 TEMP[7].x, TEMP[6].xyzz, TEMP[6].xyzz 28: RSQ TEMP[7].x, TEMP[7].xxxx 29: MUL TEMP[7].xyz, TEMP[6].xyzz, TEMP[7].xxxx 30: MUL TEMP[1].xy, TEMP[4].zwww, TEMP[7].xyyy 31: MUL TEMP[7].x, TEMP[3].yyyy, TEMP[7].zzzz 32: MOV TEMP[1].w, TEMP[7].xxxx 33: MUL TEMP[7].xyz, IN[6].xyzz, CONST[66].xxxx 34: ADD TEMP[1].xyz, TEMP[7].xyzz, TEMP[1].xyww 35: ADD TEMP[3].xy, CONST[0].yyyy, IN[2].xyyy 36: MUL TEMP[3].xy, TEMP[3].xyyy, IMM[1].zzzz 37: ADD TEMP[8].x, TEMP[3].yyyy, TEMP[3].xxxx 38: ADD TEMP[8].x, -TEMP[8].xxxx, CONST[0].yyyy 39: MUL TEMP[4].xyz, IMM[1].wwww, IN[3].zyxx 40: MOV TEMP[9].xyz, TEMP[4].xyzx 41: F2I TEMP[10].x, TEMP[4].yyyy 42: UADD TEMP[10].x, TEMP[10].xxxx, IMM[2].xxxx 43: UARL ADDR[0].x, TEMP[10].xxxx 44: UARL ADDR[0].x, TEMP[10].xxxx 45: MUL TEMP[4], TEMP[3].yyyy, CONST[ADDR[0].x+67] 46: F2I TEMP[10].x, TEMP[9].xxxx 47: UADD TEMP[10].x, TEMP[10].xxxx, IMM[2].xxxx 48: UARL ADDR[0].x, TEMP[10].xxxx 49: UARL ADDR[0].x, TEMP[10].xxxx 50: MAD TEMP[4], CONST[ADDR[0].x+67], TEMP[3].xxxx, TEMP[4] 51: F2I TEMP[10].x, TEMP[9].zzzz 52: UADD TEMP[10].x, TEMP[10].xxxx, IMM[2].xxxx 53: UARL ADDR[0].x, TEMP[10].xxxx 54: UARL ADDR[0].x, TEMP[10].xxxx 55: MAD TEMP[4], CONST[ADDR[0].x+67], TEMP[8].xxxx, TEMP[4] 56: DP3 TEMP[10].x, TEMP[1].xyzz, TEMP[4].xyzz 57: MOV TEMP[6].z, TEMP[10].xxxx 58: MOV TEMP[10].w, IN[0].wwww 59: MAD TEMP[10].xyz, IN[5].xyzz, CONST[66].xxxx, IN[0].xyzz 60: DP4 TEMP[11].x, TEMP[10], TEMP[4] 61: MOV TEMP[11].z, TEMP[11].xxxx 62: ADD TEMP[5].xyz, TEMP[7].xyzz, TEMP[5].xyzz 63: DP3 TEMP[7].x, TEMP[5].xyzz, TEMP[4].xyzz 64: MOV TEMP[4].z, TEMP[7].xxxx 65: F2I TEMP[7].x, TEMP[9].yyyy 66: UARL ADDR[0].x, TEMP[7].xxxx 67: UARL ADDR[0].x, TEMP[7].xxxx 68: MUL TEMP[7], TEMP[3].yyyy, CONST[ADDR[0].x+67] 69: F2I TEMP[12].x, TEMP[9].xxxx 70: UARL ADDR[0].x, TEMP[12].xxxx 71: UARL ADDR[0].x, TEMP[12].xxxx 72: MAD TEMP[7], CONST[ADDR[0].x+67], TEMP[3].xxxx, TEMP[7] 73: F2I TEMP[12].x, TEMP[9].zzzz 74: UARL ADDR[0].x, TEMP[12].xxxx 75: UARL ADDR[0].x, TEMP[12].xxxx 76: MAD TEMP[7], CONST[ADDR[0].x+67], TEMP[8].xxxx, TEMP[7] 77: DP3 TEMP[6].x, TEMP[1].xyzz, TEMP[7].xyzz 78: DP3 TEMP[4].x, TEMP[5].xyzz, TEMP[7].xyzz 79: F2I TEMP[12].x, TEMP[9].yyyy 80: UADD TEMP[12].x, TEMP[12].xxxx, IMM[2].yyyy 81: UARL ADDR[0].x, TEMP[12].xxxx 82: UARL ADDR[0].x, TEMP[12].xxxx 83: MUL TEMP[12], TEMP[3].yyyy, CONST[ADDR[0].x+67] 84: F2I TEMP[13].x, TEMP[9].xxxx 85: UADD TEMP[13].x, TEMP[13].xxxx, IMM[2].yyyy 86: UARL ADDR[0].x, TEMP[13].xxxx 87: UARL ADDR[0].x, TEMP[13].xxxx 88: MAD TEMP[12], CONST[ADDR[0].x+67], TEMP[3].xxxx, TEMP[12] 89: F2I TEMP[9].x, TEMP[9].zzzz 90: UADD TEMP[9].x, TEMP[9].xxxx, IMM[2].yyyy 91: UARL ADDR[0].x, TEMP[9].xxxx 92: UARL ADDR[0].x, TEMP[9].xxxx 93: MAD TEMP[12], CONST[ADDR[0].x+67], TEMP[8].xxxx, TEMP[12] 94: DP3 TEMP[8].x, TEMP[1].xyzz, TEMP[12].xyzz 95: MOV TEMP[6].y, TEMP[8].xxxx 96: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[12].xyzz 97: MOV TEMP[4].y, TEMP[5].xxxx 98: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[4].xyzz 99: RSQ TEMP[5].x, TEMP[1].xxxx 100: MUL TEMP[0].xyz, TEMP[4].xyzz, TEMP[5].xxxx 101: DP3 TEMP[1].x, TEMP[6].xyzz, TEMP[6].xyzz 102: RSQ TEMP[5].x, TEMP[1].xxxx 103: MUL TEMP[5].xyz, TEMP[6].xyzz, TEMP[5].xxxx 104: MUL TEMP[1].xyz, TEMP[6].yzxx, TEMP[4].zxyy 105: MAD TEMP[1].xyz, TEMP[4].yzxx, TEMP[6].zxyy, -TEMP[1].xyzz 106: MUL TEMP[1].xyz, TEMP[3].zzzz, TEMP[1].xyzz 107: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz 108: RSQ TEMP[4].x, TEMP[4].xxxx 109: MUL TEMP[4].xyz, TEMP[1].xyzz, TEMP[4].xxxx 110: DP4 TEMP[11].x, TEMP[10], TEMP[7] 111: DP4 TEMP[6].x, TEMP[10], TEMP[12] 112: MOV TEMP[11].y, TEMP[6].xxxx 113: ADD TEMP[1].xyz, -TEMP[11].xyzz, CONST[14].xyzz 114: DP3 TEMP[6].x, TEMP[1].xyzz, TEMP[1].xyzz 115: RSQ TEMP[6].x, TEMP[6].xxxx 116: MUL TEMP[6].xyz, TEMP[1].xyzz, TEMP[6].xxxx 117: MOV TEMP[7].xyz, TEMP[11].xyzx 118: MOV TEMP[11].w, CONST[0].yyyy 119: DP4 TEMP[8].x, TEMP[11], CONST[57] 120: DP4 TEMP[9].x, TEMP[11], CONST[54] 121: MOV TEMP[1].y, TEMP[9].xxxx 122: DP4 TEMP[9].x, TEMP[11], CONST[55] 123: MOV TEMP[1].z, TEMP[9].xxxx 124: DP4 TEMP[9].x, TEMP[11], CONST[56] 125: MOV TEMP[1].w, TEMP[9].xxxx 126: RCP TEMP[3].x, TEMP[8].xxxx 127: MUL TEMP[9].xyz, TEMP[1].yzww, TEMP[3].xxxx 128: DP4 TEMP[10].x, TEMP[11], CONST[8] 129: MOV TEMP[1].y, TEMP[10].xxxx 130: DP4 TEMP[12].x, TEMP[11], CONST[9] 131: MOV TEMP[1].z, TEMP[12].xxxx 132: DP4 TEMP[13].x, TEMP[11], CONST[11] 133: MOV TEMP[1].w, TEMP[13].xxxx 134: MOV TEMP[14].xyw, TEMP[1].yzyw 135: MOV TEMP[9].w, TEMP[8].xxxx 136: DP4 TEMP[8].x, TEMP[11], CONST[10] 137: MOV TEMP[14].z, TEMP[8].xxxx 138: DP4 TEMP[15].x, TEMP[11], CONST[13] 139: MOV TEMP[7].w, TEMP[15].xxxx 140: DP4 TEMP[15].x, IN[4], CONST[48] 141: DP4 TEMP[16].x, IN[4], CONST[49] 142: MOV TEMP[15].y, TEMP[16].xxxx 143: DP4 TEMP[16].x, IN[4], CONST[52] 144: MOV TEMP[15].z, TEMP[16].xxxx 145: DP4 TEMP[16].x, IN[4], CONST[53] 146: MOV TEMP[15].w, TEMP[16].xxxx 147: DP4 TEMP[16].x, IN[4], CONST[50] 148: MOV TEMP[2].w, TEMP[16].xxxx 149: DP4 TEMP[16].x, IN[4], CONST[51] 150: MOV TEMP[6].w, TEMP[16].xxxx 151: RCP TEMP[1].x, TEMP[13].xxxx 152: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[1].xxxx 153: MOV TEMP[4].w, TEMP[12].xxxx 154: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[1].xxxx 155: MOV TEMP[5].w, TEMP[10].xxxx 156: MUL TEMP[3].xyz, CONST[15].xyzz, CONST[58].xyzz 157: UIF CONST[238].xxxx :0 158: ENDIF 159: UIF CONST[239].xxxx :0 160: ENDIF 161: UIF CONST[240].xxxx :0 162: ENDIF 163: UIF CONST[241].xxxx :0 164: ENDIF 165: ADD TEMP[1].xyz, -TEMP[11].xyzz, CONST[2].xyzz 166: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 167: RSQ TEMP[10].x, TEMP[1].xxxx 168: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[1].xxxx 169: CMP TEMP[1].x, -TEMP[1].xxxx, TEMP[10].xxxx, IMM[0].xxxx 170: MAD TEMP[1].x, TEMP[1].xxxx, CONST[16].wwww, CONST[16].xxxx 171: MOV_SAT TEMP[1].x, TEMP[1].xxxx 172: MIN TEMP[1].x, TEMP[1].xxxx, CONST[16].zzzz 173: MOV TEMP[3].w, TEMP[1].xxxx 174: MOV TEMP[2].xyz, CONST[15].wwww 175: MOV TEMP[1], TEMP[14] 176: MAD TEMP[8].x, TEMP[8].xxxx, CONST[0].zzzz, -TEMP[14].wwww 177: MOV TEMP[14].z, TEMP[8].xxxx 178: MOV TEMP[14].y, -TEMP[14].yyyy 179: MAD TEMP[14].xy, CONST[237].xyyy, TEMP[14].wwww, TEMP[14].xyyy 180: MOV OUT[10], TEMP[9] 181: MOV OUT[2], TEMP[15] 182: MOV OUT[3], TEMP[2] 183: MOV OUT[4], TEMP[6] 184: MOV OUT[5], TEMP[3] 185: MOV OUT[6], TEMP[7] 186: MOV OUT[0], TEMP[14] 187: MOV OUT[7], TEMP[5] 188: MOV OUT[1], TEMP[1] 189: MOV OUT[8], TEMP[4] 190: MOV OUT[9], TEMP[0] 191: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 768) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 772) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 776) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 780) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 784) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 788) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 792) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 796) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 800) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 804) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 808) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 812) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 816) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 820) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 824) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 828) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 832) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 836) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 840) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 844) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 848) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 852) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 856) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 860) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 864) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 868) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 872) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 876) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 880) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 884) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 888) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 892) %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 896) %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 900) %83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 904) %84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 908) %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 912) %86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 916) %87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 920) %88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 924) %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 928) %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 932) %91 = call float @llvm.SI.load.const(<16 x i8> %12, i32 936) %92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1056) %93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3792) %94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3796) %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3824) %96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3840) %97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3856) %98 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %99 = load <16 x i8> addrspace(2)* %98, !tbaa !0 %100 = add i32 %5, %7 %101 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %99, i32 0, i32 %100) %102 = extractelement <4 x float> %101, i32 0 %103 = extractelement <4 x float> %101, i32 1 %104 = extractelement <4 x float> %101, i32 2 %105 = extractelement <4 x float> %101, i32 3 %106 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %107 = load <16 x i8> addrspace(2)* %106, !tbaa !0 %108 = add i32 %5, %7 %109 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %107, i32 0, i32 %108) %110 = extractelement <4 x float> %109, i32 0 %111 = extractelement <4 x float> %109, i32 1 %112 = extractelement <4 x float> %109, i32 2 %113 = extractelement <4 x float> %109, i32 3 %114 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %115 = load <16 x i8> addrspace(2)* %114, !tbaa !0 %116 = add i32 %5, %7 %117 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %115, i32 0, i32 %116) %118 = extractelement <4 x float> %117, i32 0 %119 = extractelement <4 x float> %117, i32 1 %120 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %121 = load <16 x i8> addrspace(2)* %120, !tbaa !0 %122 = add i32 %5, %7 %123 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %121, i32 0, i32 %122) %124 = extractelement <4 x float> %123, i32 0 %125 = extractelement <4 x float> %123, i32 1 %126 = extractelement <4 x float> %123, i32 2 %127 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %128 = load <16 x i8> addrspace(2)* %127, !tbaa !0 %129 = add i32 %5, %7 %130 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %128, i32 0, i32 %129) %131 = extractelement <4 x float> %130, i32 0 %132 = extractelement <4 x float> %130, i32 1 %133 = extractelement <4 x float> %130, i32 2 %134 = extractelement <4 x float> %130, i32 3 %135 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 5 %136 = load <16 x i8> addrspace(2)* %135, !tbaa !0 %137 = add i32 %5, %7 %138 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %136, i32 0, i32 %137) %139 = extractelement <4 x float> %138, i32 0 %140 = extractelement <4 x float> %138, i32 1 %141 = extractelement <4 x float> %138, i32 2 %142 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 6 %143 = load <16 x i8> addrspace(2)* %142, !tbaa !0 %144 = add i32 %5, %7 %145 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %143, i32 0, i32 %144) %146 = extractelement <4 x float> %145, i32 0 %147 = extractelement <4 x float> %145, i32 1 %148 = extractelement <4 x float> %145, i32 2 %149 = fadd float -1.280000e+02, %110 %150 = fadd float -1.280000e+02, %111 %151 = fadd float -1.280000e+02, %112 %152 = fadd float -1.280000e+02, %113 %153 = fcmp olt float %149, %13 %154 = sext i1 %153 to i32 %155 = fcmp olt float %150, %13 %156 = sext i1 %155 to i32 %157 = fcmp olt float %151, %13 %158 = sext i1 %157 to i32 %159 = fcmp olt float %152, %13 %160 = sext i1 %159 to i32 %161 = bitcast i32 %154 to float %162 = bitcast i32 %156 to float %163 = bitcast i32 %158 to float %164 = bitcast i32 %160 to float %165 = bitcast float %161 to i32 %166 = and i32 %165, 1065353216 %167 = bitcast float %162 to i32 %168 = and i32 %167, 1065353216 %169 = bitcast float %163 to i32 %170 = and i32 %169, 1065353216 %171 = bitcast float %164 to i32 %172 = and i32 %171, 1065353216 %173 = bitcast i32 %166 to float %174 = bitcast i32 %168 to float %175 = bitcast i32 %170 to float %176 = bitcast i32 %172 to float %177 = call float @fabs(float %149) %178 = call float @fabs(float %150) %179 = call float @fabs(float %151) %180 = call float @fabs(float %152) %181 = fsub float -0.000000e+00, %173 %182 = fadd float %177, %181 %183 = fsub float -0.000000e+00, %174 %184 = fadd float %178, %183 %185 = fsub float -0.000000e+00, %175 %186 = fadd float %179, %185 %187 = fsub float -0.000000e+00, %176 %188 = fadd float %180, %187 %189 = fadd float %182, -6.400000e+01 %190 = fadd float %184, -6.400000e+01 %191 = fadd float %186, -6.400000e+01 %192 = fadd float %188, -6.400000e+01 %193 = fsub float -0.000000e+00, %15 %194 = fmul float %173, %193 %195 = fadd float %194, %14 %196 = fsub float -0.000000e+00, %15 %197 = fmul float %175, %196 %198 = fadd float %197, %14 %199 = fsub float -0.000000e+00, %15 %200 = fmul float %176, %199 %201 = fadd float %200, %14 %202 = fcmp olt float %189, %13 %203 = sext i1 %202 to i32 %204 = fcmp olt float %190, %13 %205 = sext i1 %204 to i32 %206 = fcmp olt float %191, %13 %207 = sext i1 %206 to i32 %208 = fcmp olt float %192, %13 %209 = sext i1 %208 to i32 %210 = bitcast i32 %203 to float %211 = bitcast i32 %205 to float %212 = bitcast i32 %207 to float %213 = bitcast i32 %209 to float %214 = bitcast float %210 to i32 %215 = and i32 %214, 1065353216 %216 = bitcast float %211 to i32 %217 = and i32 %216, 1065353216 %218 = bitcast float %212 to i32 %219 = and i32 %218, 1065353216 %220 = bitcast float %213 to i32 %221 = and i32 %220, 1065353216 %222 = bitcast i32 %215 to float %223 = bitcast i32 %217 to float %224 = bitcast i32 %219 to float %225 = bitcast i32 %221 to float %226 = call float @fabs(float %189) %227 = call float @fabs(float %190) %228 = call float @fabs(float %191) %229 = call float @fabs(float %192) %230 = fsub float -0.000000e+00, %222 %231 = fadd float %226, %230 %232 = fsub float -0.000000e+00, %223 %233 = fadd float %227, %232 %234 = fsub float -0.000000e+00, %224 %235 = fadd float %228, %234 %236 = fsub float -0.000000e+00, %225 %237 = fadd float %229, %236 %238 = fmul float %231, 0xBF90410420000000 %239 = fadd float %238, 1.000000e+00 %240 = fmul float %235, 0xBF90410420000000 %241 = fadd float %240, 1.000000e+00 %242 = fmul float %233, 0xBF90410420000000 %243 = fadd float %242, %239 %244 = fmul float %237, 0xBF90410420000000 %245 = fadd float %244, %241 %246 = fsub float -0.000000e+00, %15 %247 = fmul float %222, %246 %248 = fadd float %247, %14 %249 = fsub float -0.000000e+00, %15 %250 = fmul float %223, %249 %251 = fadd float %250, %14 %252 = fsub float -0.000000e+00, %15 %253 = fmul float %224, %252 %254 = fadd float %253, %14 %255 = fsub float -0.000000e+00, %15 %256 = fmul float %225, %255 %257 = fadd float %256, %14 %258 = fmul float %231, 0x3F90410420000000 %259 = fmul float %233, 0x3F90410420000000 %260 = fmul float %235, 0x3F90410420000000 %261 = fmul float %237, 0x3F90410420000000 %262 = fmul float %258, %258 %263 = fmul float %259, %259 %264 = fadd float %263, %262 %265 = fmul float %243, %243 %266 = fadd float %264, %265 %267 = call float @llvm.AMDGPU.rsq.clamped.f32(float %266) %268 = fmul float %258, %267 %269 = fmul float %259, %267 %270 = fmul float %243, %267 %271 = fmul float %248, %268 %272 = fmul float %251, %269 %273 = fmul float %195, %270 %274 = fmul float %260, %260 %275 = fmul float %261, %261 %276 = fadd float %275, %274 %277 = fmul float %245, %245 %278 = fadd float %276, %277 %279 = call float @llvm.AMDGPU.rsq.clamped.f32(float %278) %280 = fmul float %260, %279 %281 = fmul float %261, %279 %282 = fmul float %245, %279 %283 = fmul float %254, %280 %284 = fmul float %257, %281 %285 = fmul float %198, %282 %286 = fmul float %146, %92 %287 = fmul float %147, %92 %288 = fmul float %148, %92 %289 = fadd float %286, %283 %290 = fadd float %287, %284 %291 = fadd float %288, %285 %292 = fadd float %14, %118 %293 = fadd float %14, %119 %294 = fmul float %292, 0x3F00000000000000 %295 = fmul float %293, 0x3F00000000000000 %296 = fadd float %295, %294 %297 = fsub float -0.000000e+00, %296 %298 = fadd float %297, %14 %299 = fmul float 0x4087E80C00000000, %126 %300 = fmul float 0x4087E80C00000000, %125 %301 = fmul float 0x4087E80C00000000, %124 %302 = fptosi float %300 to i32 %303 = bitcast i32 %302 to float %304 = bitcast float %303 to i32 %305 = add i32 %304, 2 %306 = bitcast i32 %305 to float %307 = bitcast float %306 to i32 %308 = shl i32 %307, 4 %309 = add i32 %308, 1072 %310 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %309) %311 = fmul float %295, %310 %312 = shl i32 %307, 4 %313 = add i32 %312, 1076 %314 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %313) %315 = fmul float %295, %314 %316 = shl i32 %307, 4 %317 = add i32 %316, 1080 %318 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %317) %319 = fmul float %295, %318 %320 = shl i32 %307, 4 %321 = add i32 %320, 1084 %322 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %321) %323 = fmul float %295, %322 %324 = fptosi float %299 to i32 %325 = bitcast i32 %324 to float %326 = bitcast float %325 to i32 %327 = add i32 %326, 2 %328 = bitcast i32 %327 to float %329 = bitcast float %328 to i32 %330 = shl i32 %329, 4 %331 = add i32 %330, 1072 %332 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %331) %333 = fmul float %332, %294 %334 = fadd float %333, %311 %335 = shl i32 %329, 4 %336 = add i32 %335, 1076 %337 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %336) %338 = fmul float %337, %294 %339 = fadd float %338, %315 %340 = shl i32 %329, 4 %341 = add i32 %340, 1080 %342 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %341) %343 = fmul float %342, %294 %344 = fadd float %343, %319 %345 = shl i32 %329, 4 %346 = add i32 %345, 1084 %347 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %346) %348 = fmul float %347, %294 %349 = fadd float %348, %323 %350 = fptosi float %301 to i32 %351 = bitcast i32 %350 to float %352 = bitcast float %351 to i32 %353 = add i32 %352, 2 %354 = bitcast i32 %353 to float %355 = bitcast float %354 to i32 %356 = shl i32 %355, 4 %357 = add i32 %356, 1072 %358 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %357) %359 = fmul float %358, %298 %360 = fadd float %359, %334 %361 = shl i32 %355, 4 %362 = add i32 %361, 1076 %363 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %362) %364 = fmul float %363, %298 %365 = fadd float %364, %339 %366 = shl i32 %355, 4 %367 = add i32 %366, 1080 %368 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %367) %369 = fmul float %368, %298 %370 = fadd float %369, %344 %371 = shl i32 %355, 4 %372 = add i32 %371, 1084 %373 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %372) %374 = fmul float %373, %298 %375 = fadd float %374, %349 %376 = fmul float %289, %360 %377 = fmul float %290, %365 %378 = fadd float %377, %376 %379 = fmul float %291, %370 %380 = fadd float %378, %379 %381 = fmul float %139, %92 %382 = fadd float %381, %102 %383 = fmul float %140, %92 %384 = fadd float %383, %103 %385 = fmul float %141, %92 %386 = fadd float %385, %104 %387 = fmul float %382, %360 %388 = fmul float %384, %365 %389 = fadd float %387, %388 %390 = fmul float %386, %370 %391 = fadd float %389, %390 %392 = fmul float %105, %375 %393 = fadd float %391, %392 %394 = fadd float %286, %271 %395 = fadd float %287, %272 %396 = fadd float %288, %273 %397 = fmul float %394, %360 %398 = fmul float %395, %365 %399 = fadd float %398, %397 %400 = fmul float %396, %370 %401 = fadd float %399, %400 %402 = fptosi float %300 to i32 %403 = bitcast i32 %402 to float %404 = bitcast float %403 to i32 %405 = shl i32 %404, 4 %406 = add i32 %405, 1072 %407 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %406) %408 = fmul float %295, %407 %409 = shl i32 %404, 4 %410 = add i32 %409, 1076 %411 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %410) %412 = fmul float %295, %411 %413 = shl i32 %404, 4 %414 = add i32 %413, 1080 %415 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %414) %416 = fmul float %295, %415 %417 = shl i32 %404, 4 %418 = add i32 %417, 1084 %419 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %418) %420 = fmul float %295, %419 %421 = fptosi float %299 to i32 %422 = bitcast i32 %421 to float %423 = bitcast float %422 to i32 %424 = shl i32 %423, 4 %425 = add i32 %424, 1072 %426 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %425) %427 = fmul float %426, %294 %428 = fadd float %427, %408 %429 = shl i32 %423, 4 %430 = add i32 %429, 1076 %431 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %430) %432 = fmul float %431, %294 %433 = fadd float %432, %412 %434 = shl i32 %423, 4 %435 = add i32 %434, 1080 %436 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %435) %437 = fmul float %436, %294 %438 = fadd float %437, %416 %439 = shl i32 %423, 4 %440 = add i32 %439, 1084 %441 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %440) %442 = fmul float %441, %294 %443 = fadd float %442, %420 %444 = fptosi float %301 to i32 %445 = bitcast i32 %444 to float %446 = bitcast float %445 to i32 %447 = shl i32 %446, 4 %448 = add i32 %447, 1072 %449 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %448) %450 = fmul float %449, %298 %451 = fadd float %450, %428 %452 = shl i32 %446, 4 %453 = add i32 %452, 1076 %454 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %453) %455 = fmul float %454, %298 %456 = fadd float %455, %433 %457 = shl i32 %446, 4 %458 = add i32 %457, 1080 %459 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %458) %460 = fmul float %459, %298 %461 = fadd float %460, %438 %462 = shl i32 %446, 4 %463 = add i32 %462, 1084 %464 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %463) %465 = fmul float %464, %298 %466 = fadd float %465, %443 %467 = fmul float %289, %451 %468 = fmul float %290, %456 %469 = fadd float %468, %467 %470 = fmul float %291, %461 %471 = fadd float %469, %470 %472 = fmul float %394, %451 %473 = fmul float %395, %456 %474 = fadd float %473, %472 %475 = fmul float %396, %461 %476 = fadd float %474, %475 %477 = fptosi float %300 to i32 %478 = bitcast i32 %477 to float %479 = bitcast float %478 to i32 %480 = add i32 %479, 1 %481 = bitcast i32 %480 to float %482 = bitcast float %481 to i32 %483 = shl i32 %482, 4 %484 = add i32 %483, 1072 %485 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %484) %486 = fmul float %295, %485 %487 = shl i32 %482, 4 %488 = add i32 %487, 1076 %489 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %488) %490 = fmul float %295, %489 %491 = shl i32 %482, 4 %492 = add i32 %491, 1080 %493 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %492) %494 = fmul float %295, %493 %495 = shl i32 %482, 4 %496 = add i32 %495, 1084 %497 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %496) %498 = fmul float %295, %497 %499 = fptosi float %299 to i32 %500 = bitcast i32 %499 to float %501 = bitcast float %500 to i32 %502 = add i32 %501, 1 %503 = bitcast i32 %502 to float %504 = bitcast float %503 to i32 %505 = shl i32 %504, 4 %506 = add i32 %505, 1072 %507 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %506) %508 = fmul float %507, %294 %509 = fadd float %508, %486 %510 = shl i32 %504, 4 %511 = add i32 %510, 1076 %512 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %511) %513 = fmul float %512, %294 %514 = fadd float %513, %490 %515 = shl i32 %504, 4 %516 = add i32 %515, 1080 %517 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %516) %518 = fmul float %517, %294 %519 = fadd float %518, %494 %520 = shl i32 %504, 4 %521 = add i32 %520, 1084 %522 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %521) %523 = fmul float %522, %294 %524 = fadd float %523, %498 %525 = fptosi float %301 to i32 %526 = bitcast i32 %525 to float %527 = bitcast float %526 to i32 %528 = add i32 %527, 1 %529 = bitcast i32 %528 to float %530 = bitcast float %529 to i32 %531 = shl i32 %530, 4 %532 = add i32 %531, 1072 %533 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %532) %534 = fmul float %533, %298 %535 = fadd float %534, %509 %536 = shl i32 %530, 4 %537 = add i32 %536, 1076 %538 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %537) %539 = fmul float %538, %298 %540 = fadd float %539, %514 %541 = shl i32 %530, 4 %542 = add i32 %541, 1080 %543 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %542) %544 = fmul float %543, %298 %545 = fadd float %544, %519 %546 = shl i32 %530, 4 %547 = add i32 %546, 1084 %548 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %547) %549 = fmul float %548, %298 %550 = fadd float %549, %524 %551 = fmul float %289, %535 %552 = fmul float %290, %540 %553 = fadd float %552, %551 %554 = fmul float %291, %545 %555 = fadd float %553, %554 %556 = fmul float %394, %535 %557 = fmul float %395, %540 %558 = fadd float %557, %556 %559 = fmul float %396, %545 %560 = fadd float %558, %559 %561 = fmul float %476, %476 %562 = fmul float %560, %560 %563 = fadd float %562, %561 %564 = fmul float %401, %401 %565 = fadd float %563, %564 %566 = call float @llvm.AMDGPU.rsq.clamped.f32(float %565) %567 = fmul float %476, %566 %568 = fmul float %560, %566 %569 = fmul float %401, %566 %570 = fmul float %471, %471 %571 = fmul float %555, %555 %572 = fadd float %571, %570 %573 = fmul float %380, %380 %574 = fadd float %572, %573 %575 = call float @llvm.AMDGPU.rsq.clamped.f32(float %574) %576 = fmul float %471, %575 %577 = fmul float %555, %575 %578 = fmul float %380, %575 %579 = fmul float %555, %401 %580 = fmul float %380, %476 %581 = fmul float %471, %560 %582 = fsub float -0.000000e+00, %579 %583 = fmul float %560, %380 %584 = fadd float %583, %582 %585 = fsub float -0.000000e+00, %580 %586 = fmul float %401, %471 %587 = fadd float %586, %585 %588 = fsub float -0.000000e+00, %581 %589 = fmul float %476, %555 %590 = fadd float %589, %588 %591 = fmul float %201, %584 %592 = fmul float %201, %587 %593 = fmul float %201, %590 %594 = fmul float %591, %591 %595 = fmul float %592, %592 %596 = fadd float %595, %594 %597 = fmul float %593, %593 %598 = fadd float %596, %597 %599 = call float @llvm.AMDGPU.rsq.clamped.f32(float %598) %600 = fmul float %591, %599 %601 = fmul float %592, %599 %602 = fmul float %593, %599 %603 = fmul float %382, %451 %604 = fmul float %384, %456 %605 = fadd float %603, %604 %606 = fmul float %386, %461 %607 = fadd float %605, %606 %608 = fmul float %105, %466 %609 = fadd float %607, %608 %610 = fmul float %382, %535 %611 = fmul float %384, %540 %612 = fadd float %610, %611 %613 = fmul float %386, %545 %614 = fadd float %612, %613 %615 = fmul float %105, %550 %616 = fadd float %614, %615 %617 = fsub float -0.000000e+00, %609 %618 = fadd float %617, %39 %619 = fsub float -0.000000e+00, %616 %620 = fadd float %619, %40 %621 = fsub float -0.000000e+00, %393 %622 = fadd float %621, %41 %623 = fmul float %618, %618 %624 = fmul float %620, %620 %625 = fadd float %624, %623 %626 = fmul float %622, %622 %627 = fadd float %625, %626 %628 = call float @llvm.AMDGPU.rsq.clamped.f32(float %627) %629 = fmul float %618, %628 %630 = fmul float %620, %628 %631 = fmul float %622, %628 %632 = fmul float %609, %85 %633 = fmul float %616, %86 %634 = fadd float %632, %633 %635 = fmul float %393, %87 %636 = fadd float %634, %635 %637 = fmul float %14, %88 %638 = fadd float %636, %637 %639 = fmul float %609, %73 %640 = fmul float %616, %74 %641 = fadd float %639, %640 %642 = fmul float %393, %75 %643 = fadd float %641, %642 %644 = fmul float %14, %76 %645 = fadd float %643, %644 %646 = fmul float %609, %77 %647 = fmul float %616, %78 %648 = fadd float %646, %647 %649 = fmul float %393, %79 %650 = fadd float %648, %649 %651 = fmul float %14, %80 %652 = fadd float %650, %651 %653 = fmul float %609, %81 %654 = fmul float %616, %82 %655 = fadd float %653, %654 %656 = fmul float %393, %83 %657 = fadd float %655, %656 %658 = fmul float %14, %84 %659 = fadd float %657, %658 %660 = fdiv float 1.000000e+00, %638 %661 = fmul float %645, %660 %662 = fmul float %652, %660 %663 = fmul float %659, %660 %664 = fmul float %609, %19 %665 = fmul float %616, %20 %666 = fadd float %664, %665 %667 = fmul float %393, %21 %668 = fadd float %666, %667 %669 = fmul float %14, %22 %670 = fadd float %668, %669 %671 = fmul float %609, %23 %672 = fmul float %616, %24 %673 = fadd float %671, %672 %674 = fmul float %393, %25 %675 = fadd float %673, %674 %676 = fmul float %14, %26 %677 = fadd float %675, %676 %678 = fmul float %609, %31 %679 = fmul float %616, %32 %680 = fadd float %678, %679 %681 = fmul float %393, %33 %682 = fadd float %680, %681 %683 = fmul float %14, %34 %684 = fadd float %682, %683 %685 = fmul float %609, %27 %686 = fmul float %616, %28 %687 = fadd float %685, %686 %688 = fmul float %393, %29 %689 = fadd float %687, %688 %690 = fmul float %14, %30 %691 = fadd float %689, %690 %692 = fmul float %609, %35 %693 = fmul float %616, %36 %694 = fadd float %692, %693 %695 = fmul float %393, %37 %696 = fadd float %694, %695 %697 = fmul float %14, %38 %698 = fadd float %696, %697 %699 = fmul float %131, %49 %700 = fmul float %132, %50 %701 = fadd float %699, %700 %702 = fmul float %133, %51 %703 = fadd float %701, %702 %704 = fmul float %134, %52 %705 = fadd float %703, %704 %706 = fmul float %131, %53 %707 = fmul float %132, %54 %708 = fadd float %706, %707 %709 = fmul float %133, %55 %710 = fadd float %708, %709 %711 = fmul float %134, %56 %712 = fadd float %710, %711 %713 = fmul float %131, %65 %714 = fmul float %132, %66 %715 = fadd float %713, %714 %716 = fmul float %133, %67 %717 = fadd float %715, %716 %718 = fmul float %134, %68 %719 = fadd float %717, %718 %720 = fmul float %131, %69 %721 = fmul float %132, %70 %722 = fadd float %720, %721 %723 = fmul float %133, %71 %724 = fadd float %722, %723 %725 = fmul float %134, %72 %726 = fadd float %724, %725 %727 = fmul float %131, %57 %728 = fmul float %132, %58 %729 = fadd float %727, %728 %730 = fmul float %133, %59 %731 = fadd float %729, %730 %732 = fmul float %134, %60 %733 = fadd float %731, %732 %734 = fmul float %131, %61 %735 = fmul float %132, %62 %736 = fadd float %734, %735 %737 = fmul float %133, %63 %738 = fadd float %736, %737 %739 = fmul float %134, %64 %740 = fadd float %738, %739 %741 = fdiv float 1.000000e+00, %684 %742 = fmul float %677, %741 %743 = fmul float %670, %741 %744 = fmul float %42, %89 %745 = fmul float %43, %90 %746 = fmul float %44, %91 %747 = bitcast float %95 to i32 %748 = icmp ne i32 %747, 0 %749 = bitcast float %96 to i32 %750 = icmp ne i32 %749, 0 %751 = bitcast float %97 to i32 %752 = icmp ne i32 %751, 0 %753 = fsub float -0.000000e+00, %609 %754 = fadd float %753, %16 %755 = fsub float -0.000000e+00, %616 %756 = fadd float %755, %17 %757 = fsub float -0.000000e+00, %393 %758 = fadd float %757, %18 %759 = fmul float %754, %754 %760 = fmul float %756, %756 %761 = fadd float %760, %759 %762 = fmul float %758, %758 %763 = fadd float %761, %762 %764 = call float @llvm.AMDGPU.rsq.clamped.f32(float %763) %765 = fmul float %764, %763 %766 = fsub float -0.000000e+00, %763 %767 = call float @llvm.AMDGPU.cndlt(float %766, float %765, float 0.000000e+00) %768 = fmul float %767, %48 %769 = fadd float %768, %46 %770 = call float @llvm.AMDIL.clamp.(float %769, float 0.000000e+00, float 1.000000e+00) %771 = call float @llvm.minnum.f32(float %770, float %47) %772 = fsub float -0.000000e+00, %684 %773 = fmul float %691, %15 %774 = fadd float %773, %772 %775 = fsub float -0.000000e+00, %677 %776 = fmul float %93, %684 %777 = fadd float %776, %670 %778 = fmul float %94, %684 %779 = fadd float %778, %775 %780 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 16 %781 = load <16 x i8> addrspace(2)* %780, !tbaa !0 %782 = call float @llvm.SI.load.const(<16 x i8> %781, i32 0) %783 = fmul float %782, %670 %784 = call float @llvm.SI.load.const(<16 x i8> %781, i32 4) %785 = fmul float %784, %677 %786 = fadd float %783, %785 %787 = call float @llvm.SI.load.const(<16 x i8> %781, i32 8) %788 = fmul float %787, %691 %789 = fadd float %786, %788 %790 = call float @llvm.SI.load.const(<16 x i8> %781, i32 12) %791 = fmul float %790, %684 %792 = fadd float %789, %791 %793 = call float @llvm.SI.load.const(<16 x i8> %781, i32 16) %794 = fmul float %793, %670 %795 = call float @llvm.SI.load.const(<16 x i8> %781, i32 20) %796 = fmul float %795, %677 %797 = fadd float %794, %796 %798 = call float @llvm.SI.load.const(<16 x i8> %781, i32 24) %799 = fmul float %798, %691 %800 = fadd float %797, %799 %801 = call float @llvm.SI.load.const(<16 x i8> %781, i32 28) %802 = fmul float %801, %684 %803 = fadd float %800, %802 %804 = call float @llvm.SI.load.const(<16 x i8> %781, i32 32) %805 = fmul float %804, %670 %806 = call float @llvm.SI.load.const(<16 x i8> %781, i32 36) %807 = fmul float %806, %677 %808 = fadd float %805, %807 %809 = call float @llvm.SI.load.const(<16 x i8> %781, i32 40) %810 = fmul float %809, %691 %811 = fadd float %808, %810 %812 = call float @llvm.SI.load.const(<16 x i8> %781, i32 44) %813 = fmul float %812, %684 %814 = fadd float %811, %813 %815 = call float @llvm.SI.load.const(<16 x i8> %781, i32 48) %816 = fmul float %815, %670 %817 = call float @llvm.SI.load.const(<16 x i8> %781, i32 52) %818 = fmul float %817, %677 %819 = fadd float %816, %818 %820 = call float @llvm.SI.load.const(<16 x i8> %781, i32 56) %821 = fmul float %820, %691 %822 = fadd float %819, %821 %823 = call float @llvm.SI.load.const(<16 x i8> %781, i32 60) %824 = fmul float %823, %684 %825 = fadd float %822, %824 %826 = call float @llvm.SI.load.const(<16 x i8> %781, i32 64) %827 = fmul float %826, %670 %828 = call float @llvm.SI.load.const(<16 x i8> %781, i32 68) %829 = fmul float %828, %677 %830 = fadd float %827, %829 %831 = call float @llvm.SI.load.const(<16 x i8> %781, i32 72) %832 = fmul float %831, %691 %833 = fadd float %830, %832 %834 = call float @llvm.SI.load.const(<16 x i8> %781, i32 76) %835 = fmul float %834, %684 %836 = fadd float %833, %835 %837 = call float @llvm.SI.load.const(<16 x i8> %781, i32 80) %838 = fmul float %837, %670 %839 = call float @llvm.SI.load.const(<16 x i8> %781, i32 84) %840 = fmul float %839, %677 %841 = fadd float %838, %840 %842 = call float @llvm.SI.load.const(<16 x i8> %781, i32 88) %843 = fmul float %842, %691 %844 = fadd float %841, %843 %845 = call float @llvm.SI.load.const(<16 x i8> %781, i32 92) %846 = fmul float %845, %684 %847 = fadd float %844, %846 %848 = call float @llvm.SI.load.const(<16 x i8> %781, i32 96) %849 = fmul float %848, %670 %850 = call float @llvm.SI.load.const(<16 x i8> %781, i32 100) %851 = fmul float %850, %677 %852 = fadd float %849, %851 %853 = call float @llvm.SI.load.const(<16 x i8> %781, i32 104) %854 = fmul float %853, %691 %855 = fadd float %852, %854 %856 = call float @llvm.SI.load.const(<16 x i8> %781, i32 108) %857 = fmul float %856, %684 %858 = fadd float %855, %857 %859 = call float @llvm.SI.load.const(<16 x i8> %781, i32 112) %860 = fmul float %859, %670 %861 = call float @llvm.SI.load.const(<16 x i8> %781, i32 116) %862 = fmul float %861, %677 %863 = fadd float %860, %862 %864 = call float @llvm.SI.load.const(<16 x i8> %781, i32 120) %865 = fmul float %864, %691 %866 = fadd float %863, %865 %867 = call float @llvm.SI.load.const(<16 x i8> %781, i32 124) %868 = fmul float %867, %684 %869 = fadd float %866, %868 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %705, float %712, float %719, float %726) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %45, float %45, float %45, float %733) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %629, float %630, float %631, float %740) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %744, float %745, float %746, float %771) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %609, float %616, float %393, float %698) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %576, float %577, float %578, float %743) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %600, float %601, float %602, float %742) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %567, float %568, float %569, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 40, i32 0, float %661, float %662, float %663, float %638) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %777, float %779, float %774, float %684) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %792, float %803, float %814, float %825) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %836, float %847, float %858, float %869) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #3 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[24:27], s[8:9], 0x0 ; C08C0900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C s_load_dwordx4 s[4:7], s[8:9], 0x10 ; C0820910 s_load_dwordx4 s[28:31], s[8:9], 0x14 ; C08E0914 s_load_dwordx4 s[8:11], s[8:9], 0x18 ; C0840918 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s0, s[4:7], 0xd5 ; C20005D5 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s0, v2 ; 100A0400 s_buffer_load_dword s0, s[4:7], 0xd4 ; C20005D4 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v1, s0, v5 ; D2820005 04140101 s_buffer_load_dword s0, s[4:7], 0xd6 ; C20005D6 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s0, v5 ; D2820005 04140103 s_buffer_load_dword s0, s[4:7], 0xd7 ; C20005D7 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s0, v5 ; D2820005 04140104 s_buffer_load_dword s0, s[4:7], 0xd1 ; C20005D1 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s0, v2 ; 100C0400 s_buffer_load_dword s0, s[4:7], 0xd0 ; C20005D0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s0, v6 ; D2820006 04180101 s_buffer_load_dword s0, s[4:7], 0xd2 ; C20005D2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s0, v6 ; D2820006 04180103 s_buffer_load_dword s0, s[4:7], 0xd3 ; C20005D3 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v4, s0, v6 ; D2820006 04180104 s_buffer_load_dword s0, s[4:7], 0xc5 ; C20005C5 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s0, v2 ; 100E0400 s_buffer_load_dword s0, s[4:7], 0xc4 ; C20005C4 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v1, s0, v7 ; D2820007 041C0101 s_buffer_load_dword s0, s[4:7], 0xc6 ; C20005C6 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v3, s0, v7 ; D2820007 041C0103 s_buffer_load_dword s0, s[4:7], 0xc7 ; C20005C7 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v4, s0, v7 ; D2820007 041C0104 s_buffer_load_dword s0, s[4:7], 0xc1 ; C20005C1 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s0, v2 ; 10100400 s_buffer_load_dword s0, s[4:7], 0xc0 ; C20005C0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v1, s0, v8 ; D2820008 04200101 s_buffer_load_dword s0, s[4:7], 0xc2 ; C20005C2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v3, s0, v8 ; D2820008 04200103 s_buffer_load_dword s0, s[4:7], 0xc3 ; C20005C3 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v4, s0, v8 ; D2820008 04200104 exp 15, 32, 0, 0, 0, v8, v7, v6, v5 ; F800020F 05060708 s_buffer_load_dword s0, s[4:7], 0xc9 ; C20005C9 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v5, s0, v2 ; 100A0400 s_buffer_load_dword s0, s[4:7], 0xc8 ; C20005C8 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v1, s0, v5 ; D2820005 04140101 s_buffer_load_dword s0, s[4:7], 0xca ; C20005CA s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s0, v5 ; D2820005 04140103 s_buffer_load_dword s0, s[4:7], 0xcb ; C20005CB s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s0, v5 ; D2820005 04140104 s_buffer_load_dword s0, s[4:7], 0x3f ; C200053F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s0 ; 7E0C0200 v_mov_b32_e32 v7, s0 ; 7E0E0200 v_mov_b32_e32 v8, s0 ; 7E100200 exp 15, 33, 0, 0, 0, v6, v7, v8, v5 ; F800021F 05080706 s_movk_i32 s0, 0x420 ; B0000420 s_buffer_load_dword s1, s[4:7], s0 ; C2008400 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[5:8], v0, s[24:27], 0 idxen ; E00C2000 80060500 buffer_load_format_xyzw v[9:12], v0, s[28:31], 0 idxen ; E00C2000 80070900 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v13, v10, s1, v6 ; D282000D 0418030A buffer_load_format_xyzw v[14:17], v0, s[20:23], 0 idxen ; E00C2000 80050E00 s_buffer_load_dword s0, s[4:7], 0x1 ; C2000501 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v18, s0, v15 ; 06241E00 v_mov_b32_e32 v19, 0x38000000 ; 7E2602FF 38000000 v_mul_f32_e32 v20, 0x38000000, v18 ; 102824FF 38000000 buffer_load_format_xyzw v[21:24], v0, s[16:19], 0 idxen ; E00C2000 80041500 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v25, 0x443f4060, v22 ; 10322CFF 443F4060 v_cvt_i32_f32_e32 v25, v25 ; 7E321119 v_lshlrev_b32_e32 v25, 4, v25 ; 34323284 s_movk_i32 s16, 0x444 ; B0100444 v_add_i32_e32 v26, s16, v25 ; 4A343210 buffer_load_dword v26, v26, s[4:7], 0 offen ; E0301000 80011A1A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v26, v26, v20 ; 1034291A v_add_f32_e32 v14, s0, v14 ; 061C1C00 v_mul_f32_e32 v14, 0x38000000, v14 ; 101C1CFF 38000000 v_mul_f32_e32 v15, 0x443f4060, v23 ; 101E2EFF 443F4060 v_cvt_i32_f32_e32 v15, v15 ; 7E1E110F v_lshlrev_b32_e32 v15, 4, v15 ; 341E1E84 v_add_i32_e32 v16, s16, v15 ; 4A201E10 buffer_load_dword v16, v16, s[4:7], 0 offen ; E0301000 80011010 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v16, v14, v26 ; D2820010 046A1D10 v_mad_f32 v17, v18, v19, v14 ; D2820011 043A2712 v_sub_f32_e32 v17, s0, v17 ; 08222200 v_mul_f32_e32 v18, 0x443f4060, v21 ; 10242AFF 443F4060 v_cvt_i32_f32_e32 v18, v18 ; 7E241112 v_lshlrev_b32_e32 v18, 4, v18 ; 34242484 v_add_i32_e32 v19, s16, v18 ; 4A262410 buffer_load_dword v19, v19, s[4:7], 0 offen ; E0301000 80011313 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v19, v17, v16 ; D2820010 04422313 v_mul_f32_e32 v19, v16, v13 ; 10261B10 v_mad_f32 v21, v9, s1, v5 ; D2820015 04140309 s_movk_i32 s16, 0x440 ; B0100440 v_add_i32_e32 v22, s16, v25 ; 4A2C3210 buffer_load_dword v22, v22, s[4:7], 0 offen ; E0301000 80011616 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v22, v22, v20 ; 102C2916 v_add_i32_e32 v23, s16, v15 ; 4A2E1E10 buffer_load_dword v23, v23, s[4:7], 0 offen ; E0301000 80011717 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v22, v23, v14, v22 ; D2820016 045A1D17 v_add_i32_e32 v23, s16, v18 ; 4A2E2410 buffer_load_dword v23, v23, s[4:7], 0 offen ; E0301000 80011717 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v22, v23, v17, v22 ; D2820016 045A2317 v_mad_f32 v19, v21, v22, v19 ; D2820013 044E2D15 v_mad_f32 v9, v11, s1, v7 ; D2820009 041C030B s_movk_i32 s16, 0x448 ; B0100448 v_add_i32_e32 v10, s16, v25 ; 4A143210 buffer_load_dword v10, v10, s[4:7], 0 offen ; E0301000 80010A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, v10, v20 ; 1014290A v_add_i32_e32 v11, s16, v15 ; 4A161E10 buffer_load_dword v11, v11, s[4:7], 0 offen ; E0301000 80010B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v11, v14, v10 ; D282000A 042A1D0B v_add_i32_e32 v11, s16, v18 ; 4A162410 buffer_load_dword v11, v11, s[4:7], 0 offen ; E0301000 80010B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v11, v17, v10 ; D282000A 042A230B v_mad_f32 v11, v9, v10, v19 ; D282000B 044E1509 s_movk_i32 s16, 0x44c ; B010044C v_add_i32_e32 v12, s16, v25 ; 4A183210 buffer_load_dword v12, v12, s[4:7], 0 offen ; E0301000 80010C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v12, v12, v20 ; 1018290C v_add_i32_e32 v19, s16, v15 ; 4A261E10 buffer_load_dword v19, v19, s[4:7], 0 offen ; E0301000 80011313 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v19, v14, v12 ; D282000C 04321D13 v_add_i32_e32 v19, s16, v18 ; 4A262410 buffer_load_dword v19, v19, s[4:7], 0 offen ; E0301000 80011313 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v19, v17, v12 ; D282000C 04322313 v_mad_f32 v11, v8, v12, v11 ; D282000B 042E1908 s_buffer_load_dword s16, s[4:7], 0x39 ; C2080539 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v12, s16, v11 ; 08181610 s_movk_i32 s16, 0x434 ; B0100434 v_add_i32_e32 v19, s16, v25 ; 4A263210 buffer_load_dword v19, v19, s[4:7], 0 offen ; E0301000 80011313 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v19, v19, v20 ; 10262913 v_add_i32_e32 v23, s16, v15 ; 4A2E1E10 buffer_load_dword v23, v23, s[4:7], 0 offen ; E0301000 80011717 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v19, v23, v14, v19 ; D2820013 044E1D17 v_add_i32_e32 v23, s16, v18 ; 4A2E2410 buffer_load_dword v23, v23, s[4:7], 0 offen ; E0301000 80011717 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v19, v23, v17, v19 ; D2820013 044E2317 v_mul_f32_e32 v23, v19, v13 ; 102E1B13 s_movk_i32 s16, 0x430 ; B0100430 v_add_i32_e32 v24, s16, v25 ; 4A303210 buffer_load_dword v24, v24, s[4:7], 0 offen ; E0301000 80011818 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v24, v24, v20 ; 10302918 v_add_i32_e32 v26, s16, v15 ; 4A341E10 buffer_load_dword v26, v26, s[4:7], 0 offen ; E0301000 80011A1A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v24, v26, v14, v24 ; D2820018 04621D1A v_add_i32_e32 v26, s16, v18 ; 4A342410 buffer_load_dword v26, v26, s[4:7], 0 offen ; E0301000 80011A1A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v24, v26, v17, v24 ; D2820018 0462231A v_mad_f32 v23, v21, v24, v23 ; D2820017 045E3115 s_movk_i32 s16, 0x438 ; B0100438 v_add_i32_e32 v26, s16, v25 ; 4A343210 buffer_load_dword v26, v26, s[4:7], 0 offen ; E0301000 80011A1A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v26, v26, v20 ; 1034291A v_add_i32_e32 v27, s16, v15 ; 4A361E10 buffer_load_dword v27, v27, s[4:7], 0 offen ; E0301000 80011B1B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v26, v27, v14, v26 ; D282001A 046A1D1B v_add_i32_e32 v27, s16, v18 ; 4A362410 buffer_load_dword v27, v27, s[4:7], 0 offen ; E0301000 80011B1B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v26, v27, v17, v26 ; D282001A 046A231B v_mad_f32 v23, v9, v26, v23 ; D2820017 045E3509 s_movk_i32 s16, 0x43c ; B010043C v_add_i32_e32 v27, s16, v25 ; 4A363210 buffer_load_dword v27, v27, s[4:7], 0 offen ; E0301000 80011B1B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v27, v27, v20 ; 1036291B v_add_i32_e32 v28, s16, v15 ; 4A381E10 buffer_load_dword v28, v28, s[4:7], 0 offen ; E0301000 80011C1C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v27, v28, v14, v27 ; D282001B 046E1D1C v_add_i32_e32 v28, s16, v18 ; 4A382410 buffer_load_dword v28, v28, s[4:7], 0 offen ; E0301000 80011C1C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v27, v28, v17, v27 ; D282001B 046E231C v_mad_f32 v23, v8, v27, v23 ; D2820017 045E3708 s_buffer_load_dword s16, s[4:7], 0x38 ; C2080538 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v27, s16, v23 ; 08362E10 v_mul_f32_e32 v28, v27, v27 ; 1038371B v_mad_f32 v28, v12, v12, v28 ; D282001C 0472190C s_movk_i32 s16, 0x454 ; B0100454 v_add_i32_e32 v29, s16, v25 ; 4A3A3210 buffer_load_dword v29, v29, s[4:7], 0 offen ; E0301000 80011D1D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v29, v29, v20 ; 103A291D v_add_i32_e32 v30, s16, v15 ; 4A3C1E10 buffer_load_dword v30, v30, s[4:7], 0 offen ; E0301000 80011E1E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v29, v30, v14, v29 ; D282001D 04761D1E v_add_i32_e32 v30, s16, v18 ; 4A3C2410 buffer_load_dword v30, v30, s[4:7], 0 offen ; E0301000 80011E1E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v29, v30, v17, v29 ; D282001D 0476231E v_mul_f32_e32 v13, v29, v13 ; 101A1B1D s_movk_i32 s16, 0x450 ; B0100450 v_add_i32_e32 v30, s16, v25 ; 4A3C3210 buffer_load_dword v30, v30, s[4:7], 0 offen ; E0301000 80011E1E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v30, v30, v20 ; 103C291E v_add_i32_e32 v31, s16, v15 ; 4A3E1E10 buffer_load_dword v31, v31, s[4:7], 0 offen ; E0301000 80011F1F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v30, v31, v14, v30 ; D282001E 047A1D1F v_add_i32_e32 v31, s16, v18 ; 4A3E2410 buffer_load_dword v31, v31, s[4:7], 0 offen ; E0301000 80011F1F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v30, v31, v17, v30 ; D282001E 047A231F v_mad_f32 v13, v21, v30, v13 ; D282000D 04363D15 s_movk_i32 s16, 0x458 ; B0100458 v_add_i32_e32 v21, s16, v25 ; 4A2A3210 buffer_load_dword v21, v21, s[4:7], 0 offen ; E0301000 80011515 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v21, v20 ; 102A2915 v_add_i32_e32 v31, s16, v15 ; 4A3E1E10 buffer_load_dword v31, v31, s[4:7], 0 offen ; E0301000 80011F1F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v21, v31, v14, v21 ; D2820015 04561D1F v_add_i32_e32 v31, s16, v18 ; 4A3E2410 buffer_load_dword v31, v31, s[4:7], 0 offen ; E0301000 80011F1F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v21, v31, v17, v21 ; D2820015 0456231F v_mad_f32 v9, v9, v21, v13 ; D2820009 04362B09 s_movk_i32 s16, 0x45c ; B010045C v_add_i32_e32 v13, s16, v25 ; 4A1A3210 buffer_load_dword v13, v13, s[4:7], 0 offen ; E0301000 80010D0D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v13, v13, v20 ; 101A290D v_add_i32_e32 v15, s16, v15 ; 4A1E1E10 buffer_load_dword v15, v15, s[4:7], 0 offen ; E0301000 80010F0F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v13, v15, v14, v13 ; D282000D 04361D0F v_add_i32_e32 v14, s16, v18 ; 4A1C2410 buffer_load_dword v14, v14, s[4:7], 0 offen ; E0301000 80010E0E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v13, v14, v17, v13 ; D282000D 0436230E v_mad_f32 v5, v8, v13, v9 ; D2820005 04261B08 s_buffer_load_dword s16, s[4:7], 0x3a ; C208053A s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v6, s16, v5 ; 080C0A10 v_mad_f32 v7, v6, v6, v28 ; D2820007 04720D06 v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mul_f32_e32 v6, v7, v6 ; 100C0D07 v_mul_f32_e32 v8, v7, v12 ; 10101907 v_mul_f32_e32 v7, v7, v27 ; 100E3707 s_buffer_load_dword s16, s[4:7], 0xcd ; C20805CD s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s16, v2 ; 10120410 s_buffer_load_dword s16, s[4:7], 0xcc ; C20805CC s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v9, v1, s16, v9 ; D2820009 04242101 s_buffer_load_dword s16, s[4:7], 0xce ; C20805CE s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v9, v3, s16, v9 ; D2820009 04242103 s_buffer_load_dword s16, s[4:7], 0xcf ; C20805CF s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s16, v9 ; D2820001 04242104 exp 15, 34, 0, 0, 0, v7, v8, v6, v1 ; F800022F 01060807 s_buffer_load_dword s16, s[4:7], 0x9 ; C2080509 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_sub_f32_e32 v1, s16, v11 ; 08021610 s_buffer_load_dword s16, s[4:7], 0x8 ; C2080508 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v2, s16, v23 ; 08042E10 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mad_f32 v1, v1, v1, v2 ; D2820001 040A0301 s_buffer_load_dword s16, s[4:7], 0xa ; C208050A s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v2, s16, v5 ; 08040A10 v_mad_f32 v1, v2, v2, v1 ; D2820001 04060502 v_rsq_clamp_f32_e32 v2, v1 ; 7E045901 v_mul_f32_e32 v2, v1, v2 ; 10040501 v_xor_b32_e32 v1, 0x80000000, v1 ; 3A0202FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_mov_b32_e32 v1, 0 ; 7E020280 v_cndmask_b32_e64 v2, 0, v2, vcc ; D2000002 01AA0480 s_buffer_load_dword s16, s[4:7], 0x40 ; C2080540 s_buffer_load_dword s17, s[4:7], 0x43 ; C2088543 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s16 ; 7E060210 v_mad_f32 v2, s17, v2, v3 ; D2820002 040E0411 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 s_buffer_load_dword s16, s[4:7], 0x42 ; C2080542 s_waitcnt lgkmcnt(0) ; BF8C007F v_min_f32_e32 v2, s16, v2 ; 1E040410 s_buffer_load_dword s16, s[4:7], 0xea ; C20805EA s_buffer_load_dword s17, s[4:7], 0x3e ; C208853E s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s16 ; 7E060210 v_mul_f32_e32 v3, s17, v3 ; 10060611 s_buffer_load_dword s16, s[4:7], 0xe9 ; C20805E9 s_buffer_load_dword s17, s[4:7], 0x3d ; C208853D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s16 ; 7E080210 v_mul_f32_e32 v4, s17, v4 ; 10080811 s_buffer_load_dword s16, s[4:7], 0xe8 ; C20805E8 s_buffer_load_dword s17, s[4:7], 0x3c ; C208853C s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s16 ; 7E0C0210 v_mul_f32_e32 v6, s17, v6 ; 100C0C11 exp 15, 35, 0, 0, 0, v6, v4, v3, v2 ; F800023F 02030406 s_buffer_load_dword s16, s[4:7], 0x35 ; C2080535 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v2, s16, v11 ; 10041610 s_buffer_load_dword s16, s[4:7], 0x34 ; C2080534 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v23, s16, v2 ; D2820002 04082117 s_buffer_load_dword s16, s[4:7], 0x36 ; C2080536 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v5, s16, v2 ; D2820002 04082105 s_buffer_load_dword s16, s[4:7], 0x37 ; C2080537 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s16 ; 7E060210 v_mad_f32 v2, v3, s0, v2 ; D2820002 04080103 exp 15, 36, 0, 0, 0, v23, v11, v5, v2 ; F800024F 02050B17 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v2, 0xc3000000 ; 7E0402FF C3000000 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v3, v2, v8 ; 06061102 s_buffer_load_dword s12, s[4:7], 0x0 ; C2060500 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[14:15], v3, s12 ; D002000E 00001903 v_cndmask_b32_e64 v4, 0, -1, s[14:15] ; D2000004 00398280 v_and_b32_e32 v4, 1.0, v4 ; 360808F2 v_sub_f32_e64 v3, |v3|, v4 ; D2080103 00020903 v_mov_b32_e32 v12, 0xc2800000 ; 7E1802FF C2800000 v_add_f32_e32 v3, v12, v3 ; 0606070C v_cmp_lt_f32_e64 s[14:15], v3, s12 ; D002000E 00001903 v_cndmask_b32_e64 v13, 0, -1, s[14:15] ; D200080D 00398280 v_and_b32_e32 v13, 1.0, v13 ; 361A1AF2 v_sub_f32_e64 v3, |v3|, v13 ; D2080103 00021B03 v_mov_b32_e32 v14, 0xbc820821 ; 7E1C02FF BC820821 v_mad_f32 v15, v3, v14, 1.0 ; D282000F 03CA1D03 v_add_f32_e32 v17, v2, v9 ; 06221302 v_cmp_lt_f32_e64 s[14:15], v17, s12 ; D002000E 00001911 v_cndmask_b32_e64 v18, 0, -1, s[14:15] ; D2000012 00398280 v_and_b32_e32 v18, 1.0, v18 ; 362424F2 v_sub_f32_e64 v17, |v17|, v18 ; D2080111 00022511 v_add_f32_e32 v17, v12, v17 ; 0622230C v_cmp_lt_f32_e64 s[14:15], v17, s12 ; D002000E 00001911 v_cndmask_b32_e64 v20, 0, -1, s[14:15] ; D2000014 00398280 v_and_b32_e32 v20, 1.0, v20 ; 362828F2 v_sub_f32_e64 v17, |v17|, v20 ; D2080111 00022911 v_mad_f32 v15, v17, v14, v15 ; D282000F 043E1D11 v_mul_f32_e32 v17, 0x3c820821, v17 ; 102222FF 3C820821 v_mul_f32_e32 v3, 0x3c820821, v3 ; 100606FF 3C820821 v_mul_f32_e32 v25, v3, v3 ; 10320703 v_mad_f32 v25, v17, v17, v25 ; D2820019 04662311 v_mad_f32 v25, v15, v15, v25 ; D2820019 04661F0F v_rsq_clamp_f32_e32 v25, v25 ; 7E325919 v_mul_f32_e32 v17, v25, v17 ; 10222319 s_buffer_load_dword s13, s[4:7], 0x2 ; C2068502 v_mov_b32_e32 v27, s0 ; 7E360200 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v20, -v20, s13, v27 ; D2820014 246C1B14 v_mul_f32_e32 v17, v17, v20 ; 10222911 buffer_load_format_xyzw v[31:34], v0, s[8:11], 0 idxen ; E00C2000 80021F00 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v32, s1, v17 ; D2820000 04440320 v_mul_f32_e32 v3, v25, v3 ; 10060719 v_mov_b32_e32 v17, s0 ; 7E220200 v_mad_f32 v13, -v13, s13, v17 ; D282000D 24441B0D v_mul_f32_e32 v3, v3, v13 ; 10061B03 v_mad_f32 v3, v31, s1, v3 ; D2820003 040C031F v_mul_f32_e32 v13, v22, v3 ; 101A0716 v_mad_f32 v13, v0, v16, v13 ; D282000D 04362100 v_mul_f32_e32 v15, v25, v15 ; 101E1F19 v_mov_b32_e32 v17, s0 ; 7E220200 v_mad_f32 v4, -v4, s13, v17 ; D2820004 24441B04 v_mul_f32_e32 v4, v15, v4 ; 1008090F v_mad_f32 v4, v33, s1, v4 ; D2820004 04100321 v_mad_f32 v13, v4, v10, v13 ; D282000D 04361504 v_mul_f32_e32 v15, v24, v3 ; 101E0718 v_mad_f32 v15, v0, v19, v15 ; D282000F 043E2700 v_mad_f32 v15, v4, v26, v15 ; D282000F 043E3504 v_mul_f32_e32 v17, v15, v15 ; 10221F0F v_mad_f32 v17, v13, v13, v17 ; D2820011 04461B0D v_mul_f32_e32 v3, v30, v3 ; 1006071E v_mad_f32 v0, v0, v29, v3 ; D2820000 040E3B00 v_mad_f32 v0, v4, v21, v0 ; D2820000 04022B04 v_mad_f32 v3, v0, v0, v17 ; D2820003 04460100 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_mul_f32_e32 v4, v3, v0 ; 10080103 v_mul_f32_e32 v17, v3, v13 ; 10221B03 v_mul_f32_e32 v3, v3, v15 ; 10061F03 s_buffer_load_dword s8, s[4:7], 0x21 ; C2040521 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v20, s8, v11 ; 10281608 s_buffer_load_dword s8, s[4:7], 0x20 ; C2040520 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v20, v23, s8, v20 ; D2820014 04501117 s_buffer_load_dword s8, s[4:7], 0x22 ; C2040522 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v20, v5, s8, v20 ; D2820014 04501105 s_buffer_load_dword s8, s[4:7], 0x23 ; C2040523 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v25, s8 ; 7E320208 v_mad_f32 v20, v25, s0, v20 ; D2820014 04500119 s_buffer_load_dword s8, s[4:7], 0x2d ; C204052D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v25, s8, v11 ; 10321608 s_buffer_load_dword s8, s[4:7], 0x2c ; C204052C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v25, v23, s8, v25 ; D2820019 04641117 s_buffer_load_dword s8, s[4:7], 0x2e ; C204052E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v25, v5, s8, v25 ; D2820019 04641105 s_buffer_load_dword s8, s[4:7], 0x2f ; C204052F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v27, s8 ; 7E360208 v_mad_f32 v25, v27, s0, v25 ; D2820019 0464011B v_rcp_f32_e32 v27, v25 ; 7E365519 v_mul_f32_e32 v28, v27, v20 ; 1038291B exp 15, 37, 0, 0, 0, v3, v17, v4, v28 ; F800025F 1C041103 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v3, v2, v6 ; 06060D02 v_cmp_lt_f32_e64 s[8:9], v3, s12 ; D0020008 00001903 v_cndmask_b32_e64 v4, 0, -1, s[8:9] ; D2000004 00218280 v_and_b32_e32 v4, 1.0, v4 ; 360808F2 v_sub_f32_e64 v3, |v3|, v4 ; D2080103 00020903 v_add_f32_e32 v3, v12, v3 ; 0606070C v_cmp_lt_f32_e64 s[8:9], v3, s12 ; D0020008 00001903 v_cndmask_b32_e64 v17, 0, -1, s[8:9] ; D2000811 00218280 v_and_b32_e32 v17, 1.0, v17 ; 362222F2 v_sub_f32_e64 v3, |v3|, v17 ; D2080103 00022303 v_mad_f32 v28, v3, v14, 1.0 ; D282001C 03CA1D03 v_add_f32_e32 v2, v2, v7 ; 06040F02 v_cmp_lt_f32_e64 s[8:9], v2, s12 ; D0020008 00001902 v_cndmask_b32_e64 v6, 0, -1, s[8:9] ; D2000006 00218280 v_and_b32_e32 v6, 1.0, v6 ; 360C0CF2 v_sub_f32_e64 v2, |v2|, v6 ; D2080102 00020D02 v_add_f32_e32 v2, v12, v2 ; 0604050C v_cmp_lt_f32_e64 s[8:9], v2, s12 ; D0020008 00001902 v_cndmask_b32_e64 v6, 0, -1, s[8:9] ; D2000006 00218280 v_and_b32_e32 v6, 1.0, v6 ; 360C0CF2 v_sub_f32_e64 v2, |v2|, v6 ; D2080102 00020D02 v_mad_f32 v7, v2, v14, v28 ; D2820007 04721D02 v_mul_f32_e32 v2, 0x3c820821, v2 ; 100404FF 3C820821 v_mul_f32_e32 v3, 0x3c820821, v3 ; 100606FF 3C820821 v_mul_f32_e32 v8, v3, v3 ; 10100703 v_mad_f32 v8, v2, v2, v8 ; D2820008 04220502 v_mad_f32 v8, v7, v7, v8 ; D2820008 04220F07 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v2, v8, v2 ; 10040508 v_mov_b32_e32 v9, s0 ; 7E120200 v_mad_f32 v6, -v6, s13, v9 ; D2820006 24241B06 v_mul_f32_e32 v2, v2, v6 ; 10040D02 v_mad_f32 v2, v32, s1, v2 ; D2820002 04080320 v_mul_f32_e32 v3, v8, v3 ; 10060708 v_mov_b32_e32 v6, s0 ; 7E0C0200 v_mad_f32 v6, -v17, s13, v6 ; D2820006 24181B11 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_mad_f32 v3, v31, s1, v3 ; D2820003 040C031F v_mul_f32_e32 v6, v24, v3 ; 100C0718 v_mad_f32 v6, v2, v19, v6 ; D2820006 041A2702 v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mov_b32_e32 v8, s0 ; 7E100200 v_mad_f32 v4, -v4, s13, v8 ; D2820004 24201B04 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mad_f32 v4, v33, s1, v4 ; D2820004 04100321 v_mad_f32 v6, v4, v26, v6 ; D2820006 041A3504 v_mul_f32_e32 v7, v6, v0 ; 100E0106 v_mul_f32_e32 v8, v30, v3 ; 1010071E v_mad_f32 v8, v2, v29, v8 ; D2820008 04223B02 v_mad_f32 v8, v4, v21, v8 ; D2820008 04222B04 v_mad_f32 v7, v8, v15, -v7 ; D2820007 841E1F08 v_mov_b32_e32 v9, s0 ; 7E120200 v_mad_f32 v9, -v18, s13, v9 ; D2820009 24241B12 v_mul_f32_e32 v7, v7, v9 ; 100E1307 v_mul_f32_e32 v12, v8, v13 ; 10181B08 v_mul_f32_e32 v3, v22, v3 ; 10060716 v_mad_f32 v2, v2, v16, v3 ; D2820002 040E2102 v_mad_f32 v2, v4, v10, v2 ; D2820002 040A1504 v_mad_f32 v0, v2, v0, -v12 ; D2820000 84320102 v_mul_f32_e32 v0, v0, v9 ; 10001300 v_mul_f32_e32 v3, v0, v0 ; 10060100 v_mad_f32 v3, v7, v7, v3 ; D2820003 040E0F07 v_mul_f32_e32 v4, v2, v15 ; 10081F02 v_mad_f32 v4, v6, v13, -v4 ; D2820004 84121B06 v_mul_f32_e32 v4, v4, v9 ; 10081304 v_mad_f32 v3, v4, v4, v3 ; D2820003 040E0904 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_mul_f32_e32 v4, v3, v4 ; 10080903 v_mul_f32_e32 v7, v3, v7 ; 100E0F03 v_mul_f32_e32 v0, v3, v0 ; 10000103 s_buffer_load_dword s1, s[4:7], 0x25 ; C2008525 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s1, v11 ; 10061601 s_buffer_load_dword s1, s[4:7], 0x24 ; C2008524 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v23, s1, v3 ; D2820003 040C0317 s_buffer_load_dword s1, s[4:7], 0x26 ; C2008526 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v5, s1, v3 ; D2820003 040C0305 s_buffer_load_dword s1, s[4:7], 0x27 ; C2008527 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v9, s1 ; 7E120201 v_mad_f32 v3, v9, s0, v3 ; D2820003 040C0109 v_mul_f32_e32 v9, v27, v3 ; 1012071B exp 15, 38, 0, 0, 0, v0, v7, v4, v9 ; F800026F 09040700 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v6, v6 ; 10000D06 v_mad_f32 v0, v2, v2, v0 ; D2820000 04020502 v_mad_f32 v0, v8, v8, v0 ; D2820000 04021108 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mul_f32_e32 v4, v0, v8 ; 10081100 v_mul_f32_e32 v2, v0, v2 ; 10040500 v_mul_f32_e32 v0, v0, v6 ; 10000D00 exp 15, 39, 0, 0, 0, v0, v2, v4, v1 ; F800027F 01040200 s_buffer_load_dword s1, s[4:7], 0xe1 ; C20085E1 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s1, v11 ; 10001601 s_buffer_load_dword s1, s[4:7], 0xe0 ; C20085E0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v23, s1, v0 ; D2820000 04000317 s_buffer_load_dword s1, s[4:7], 0xe2 ; C20085E2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v5, s1, v0 ; D2820000 04000305 s_buffer_load_dword s1, s[4:7], 0xe3 ; C20085E3 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s1 ; 7E020201 v_mad_f32 v0, v1, s0, v0 ; D2820000 04000101 s_buffer_load_dword s1, s[4:7], 0xe5 ; C20085E5 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s1, v11 ; 10021601 s_buffer_load_dword s1, s[4:7], 0xe4 ; C20085E4 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v23, s1, v1 ; D2820001 04040317 s_buffer_load_dword s1, s[4:7], 0xe6 ; C20085E6 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v5, s1, v1 ; D2820001 04040305 s_buffer_load_dword s1, s[4:7], 0xe7 ; C20085E7 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s1 ; 7E040201 v_mad_f32 v1, v2, s0, v1 ; D2820001 04040102 v_rcp_f32_e32 v2, v1 ; 7E045501 v_mul_f32_e32 v0, v2, v0 ; 10000102 s_buffer_load_dword s1, s[4:7], 0xdd ; C20085DD s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s1, v11 ; 10081601 s_buffer_load_dword s1, s[4:7], 0xdc ; C20085DC s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v23, s1, v4 ; D2820004 04100317 s_buffer_load_dword s1, s[4:7], 0xde ; C20085DE s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v5, s1, v4 ; D2820004 04100305 s_buffer_load_dword s1, s[4:7], 0xdf ; C20085DF s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s1 ; 7E0C0201 v_mad_f32 v4, v6, s0, v4 ; D2820004 04100106 v_mul_f32_e32 v4, v2, v4 ; 10080902 s_buffer_load_dword s1, s[4:7], 0xd9 ; C20085D9 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s1, v11 ; 100C1601 s_buffer_load_dword s1, s[4:7], 0xd8 ; C20085D8 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v23, s1, v6 ; D2820006 04180317 s_buffer_load_dword s1, s[4:7], 0xda ; C20085DA s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v5, s1, v6 ; D2820006 04180305 s_buffer_load_dword s1, s[4:7], 0xdb ; C20085DB s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s1 ; 7E0E0201 v_mad_f32 v6, v7, s0, v6 ; D2820006 04180107 v_mul_f32_e32 v2, v2, v6 ; 10040D02 exp 15, 40, 0, 0, 0, v2, v4, v0, v1 ; F800028F 01000402 s_buffer_load_dword s1, s[4:7], 0x29 ; C2008529 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s1, v11 ; 10001601 s_buffer_load_dword s1, s[4:7], 0x28 ; C2008528 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v23, s1, v0 ; D2820000 04000317 s_buffer_load_dword s1, s[4:7], 0x2a ; C200852A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v5, s1, v0 ; D2820000 04000305 s_buffer_load_dword s1, s[4:7], 0x2b ; C200852B s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s1 ; 7E020201 v_mad_f32 v0, v1, s0, v0 ; D2820000 04000101 v_mad_f32 v1, v0, s13, -v25 ; D2820001 84641B00 s_movk_i32 s0, 0xed4 ; B0000ED4 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s0, v25, -v3 ; D2820002 840E3200 s_movk_i32 s0, 0xed0 ; B0000ED0 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s0, v25, v20 ; D2820004 04523200 exp 15, 12, 0, 0, 0, v4, v2, v1, v25 ; F80000CF 19010204 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v3 ; 10020604 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s4, v20, v1 ; D2820001 04062804 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s4, v0, v1 ; D2820001 04060004 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s4, v25, v1 ; D2820001 04063204 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v3 ; 10040604 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s4, v20, v2 ; D2820002 040A2804 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s4, v0, v2 ; D2820002 040A0004 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s4, v25, v2 ; D2820002 040A3204 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v3 ; 10080604 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s4, v20, v4 ; D2820004 04122804 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s4, v0, v4 ; D2820004 04120004 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s4, v25, v4 ; D2820004 04123204 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v3 ; 100A0604 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v20, v5 ; D2820005 04162804 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v0, v5 ; D2820005 04160004 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v25, v5 ; D2820005 04163204 exp 15, 13, 0, 0, 0, v5, v4, v2, v1 ; F80000DF 01020405 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v1, s4, v3 ; 10020604 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s4, v20, v1 ; D2820001 04062804 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s4, v0, v1 ; D2820001 04060004 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s4, v25, v1 ; D2820001 04063204 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v3 ; 10040604 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s4, v20, v2 ; D2820002 040A2804 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s4, v0, v2 ; D2820002 040A0004 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s4, v25, v2 ; D2820002 040A3204 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v3 ; 10080604 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s4, v20, v4 ; D2820004 04122804 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s4, v0, v4 ; D2820004 04120004 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s4, v25, v4 ; D2820004 04123204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v3 ; 10060604 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v20, v3 ; D2820003 040E2804 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s4, v0, v3 ; D2820000 040E0004 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s0, v25, v0 ; D2820000 04023200 exp 15, 14, 0, 1, 0, v0, v4, v2, v1 ; F80008EF 01020400 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL IN[5], GENERIC[14], PERSPECTIVE DCL IN[6], GENERIC[15], PERSPECTIVE DCL IN[7], GENERIC[16], PERSPECTIVE DCL IN[8], GENERIC[17], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SAMP[10] DCL CONST[0..30] DCL TEMP[0..15], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0010, -0.0010} IMM[1] FLT32 { 0.0749, 0.1236, 0.2060, -20000.0000} IMM[2] FLT32 { 2.0000, -1.0000, 0.5000, -0.0100} IMM[3] FLT32 { 0.0100, 0.4545, 0.0000, 0.0000} 0: ABS TEMP[0].x, CONST[4].wwww 1: MUL TEMP[1].x, CONST[29].wwww, IN[4].wwww 2: MOV TEMP[2].xy, IN[0].xyyy 3: TEX TEMP[2], TEMP[2], SAMP[0], 2D 4: MOV TEMP[3].xyz, TEMP[2] 5: MUL TEMP[2].x, TEMP[2].wwww, CONST[2].wwww 6: MOV TEMP[4].xyz, TEMP[1] 7: FSGE TEMP[5].x, -TEMP[0].xxxx, IMM[0].xxxx 8: UIF TEMP[5].xxxx :0 9: MOV TEMP[2].x, TEMP[2].xxxx 10: ELSE :0 11: MOV TEMP[2].x, TEMP[1].xxxx 12: ENDIF 13: MOV TEMP[4].w, TEMP[2].xxxx 14: MOV TEMP[1].w, TEMP[4] 15: MUL TEMP[0], IMM[0].yyyx, IN[8].xyzx 16: ADD TEMP[2].xyz, TEMP[0], IMM[0].zzxy 17: MOV TEMP[5].xy, TEMP[2].xyyy 18: MOV TEMP[5].z, TEMP[2].zzzz 19: TEX TEMP[5].x, TEMP[5], SAMP[10], SHADOW2D 20: MOV TEMP[2].x, TEMP[5].xxxx 21: ADD TEMP[5].xyz, TEMP[0], IMM[0].wzxy 22: MOV TEMP[6].xy, TEMP[5].xyyy 23: MOV TEMP[6].z, TEMP[5].zzzz 24: TEX TEMP[6].x, TEMP[6], SAMP[10], SHADOW2D 25: MOV TEMP[2].y, TEMP[6].xxxx 26: ADD TEMP[5].xyz, TEMP[0], IMM[0].zwxy 27: MOV TEMP[6].xy, TEMP[5].xyyy 28: MOV TEMP[6].z, TEMP[5].zzzz 29: TEX TEMP[6].x, TEMP[6], SAMP[10], SHADOW2D 30: MOV TEMP[2].z, TEMP[6].xxxx 31: ADD TEMP[5].xyz, TEMP[0], IMM[0].wwxy 32: MOV TEMP[6].xy, TEMP[5].xyyy 33: MOV TEMP[6].z, TEMP[5].zzzz 34: TEX TEMP[6].x, TEMP[6], SAMP[10], SHADOW2D 35: MOV TEMP[2].w, TEMP[6].xxxx 36: DP4 TEMP[6].x, TEMP[2], IMM[1].xxxx 37: ADD TEMP[2].xyz, TEMP[0], IMM[0].zxxy 38: MOV TEMP[7].xy, TEMP[2].xyyy 39: MOV TEMP[7].z, TEMP[2].zzzz 40: TEX TEMP[7].x, TEMP[7], SAMP[10], SHADOW2D 41: MOV TEMP[2].x, TEMP[7].xxxx 42: ADD TEMP[5].xyz, TEMP[0], IMM[0].wxxy 43: MOV TEMP[7].xy, TEMP[5].xyyy 44: MOV TEMP[7].z, TEMP[5].zzzz 45: TEX TEMP[7].x, TEMP[7], SAMP[10], SHADOW2D 46: MOV TEMP[2].y, TEMP[7].xxxx 47: ADD TEMP[5].xyz, TEMP[0], IMM[0].xwxy 48: MOV TEMP[7].xy, TEMP[5].xyyy 49: MOV TEMP[7].z, TEMP[5].zzzz 50: TEX TEMP[7].x, TEMP[7], SAMP[10], SHADOW2D 51: MOV TEMP[2].z, TEMP[7].xxxx 52: ADD TEMP[5].xyz, TEMP[0], IMM[0].xzxy 53: ADD TEMP[0].xyz, TEMP[0], IMM[0].xxxy 54: MOV TEMP[7].xy, TEMP[5].xyyy 55: MOV TEMP[7].z, TEMP[5].zzzz 56: TEX TEMP[7].x, TEMP[7], SAMP[10], SHADOW2D 57: MOV TEMP[2].w, TEMP[7].xxxx 58: DP4 TEMP[7].x, TEMP[2], IMM[1].yyyy 59: ADD TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx 60: MOV TEMP[7].xy, TEMP[0].xyyy 61: MOV TEMP[7].z, TEMP[0].zzzz 62: TEX TEMP[7].x, TEMP[7], SAMP[10], SHADOW2D 63: MAD TEMP[0].x, TEMP[7].xxxx, IMM[1].zzzz, TEMP[6].xxxx 64: ADD TEMP[6].x, IMM[1].wwww, IN[4].zzzz 65: RCP TEMP[7].x, CONST[7].zzzz 66: MUL TEMP[6].x, -TEMP[6].xxxx, TEMP[7].xxxx 67: MAD TEMP[6].xy, CONST[7].xyyy, TEMP[6].xxxx, IN[4].xyyy 68: RCP TEMP[7].x, CONST[22].wwww 69: MAD TEMP[2].xy, TEMP[6].xyyy, TEMP[7].xxxx, CONST[6].xyyy 70: MAD TEMP[6].xy, TEMP[6].xyyy, TEMP[7].xxxx, CONST[6].zwww 71: MOV TEMP[7].xy, TEMP[2].xyyy 72: TEX TEMP[7].x, TEMP[7], SAMP[9], 2D 73: MOV TEMP[6].xy, TEMP[6].xyyy 74: TEX TEMP[6].y, TEMP[6], SAMP[9], 2D 75: MUL TEMP[8].x, TEMP[7].xxxx, TEMP[6].yyyy 76: MAD TEMP[6].x, TEMP[7].xxxx, -TEMP[6].yyyy, IMM[0].yyyy 77: MUL TEMP[2].xyz, TEMP[8].xxxx, CONST[28].xyzz 78: MUL TEMP[2].xyz, TEMP[0].xxxx, TEMP[2].xyzz 79: ADD TEMP[7].x, -TEMP[0].xxxx, IMM[0].yyyy 80: MAX TEMP[6].x, TEMP[7].xxxx, TEMP[6].xxxx 81: MUL TEMP[6].xyz, TEMP[6].xxxx, CONST[21].xyzz 82: MUL TEMP[6].xyz, TEMP[6].xyzz, IN[1].xxxx 83: MOV TEMP[7].xy, IN[0].xyyy 84: TEX TEMP[7].xyz, TEMP[7], SAMP[1], 2D 85: MAD TEMP[5].xyz, TEMP[7].xyzz, IMM[2].xxxx, IMM[2].yyyy 86: DP3 TEMP[7].x, TEMP[5].xyzz, TEMP[5].xyzz 87: RSQ TEMP[7].x, TEMP[7].xxxx 88: MUL TEMP[7].xyz, TEMP[5].xyzz, TEMP[7].xxxx 89: MUL TEMP[5].xyz, TEMP[7].yyyy, IN[6].xyzz 90: MAD TEMP[5].xyz, TEMP[7].xxxx, IN[5].xyzz, TEMP[5].xyzz 91: MAD TEMP[5].xyz, TEMP[7].zzzz, IN[7].xyzz, TEMP[5].xyzz 92: DP3 TEMP[7].x, TEMP[5].xyzz, TEMP[5].xyzz 93: RSQ TEMP[7].x, TEMP[7].xxxx 94: MUL TEMP[7].xyz, TEMP[5].xyzz, TEMP[7].xxxx 95: DP3 TEMP[8].x, CONST[22].xyzz, TEMP[7].xyzz 96: MOV_SAT TEMP[8].x, TEMP[8].xxxx 97: MUL TEMP[5].xyz, TEMP[8].xxxx, CONST[20].xyzz 98: MAD TEMP[6].xyz, TEMP[5].xyzz, IN[1].xxxx, TEMP[6].xyzz 99: FSGE TEMP[8].x, -IN[8].wwww, IMM[0].xxxx 100: UIF TEMP[8].xxxx :0 101: MOV TEMP[8].x, IMM[0].xxxx 102: ELSE :0 103: MOV TEMP[8].x, IMM[0].yyyy 104: ENDIF 105: MOV TEMP[3].xyz, TEMP[3].xyzx 106: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[8].xxxx 107: MOV TEMP[8].xyz, -CONST[7].xyzx 108: DP3 TEMP[9].x, TEMP[8].xyzz, TEMP[8].xyzz 109: RSQ TEMP[9].x, TEMP[9].xxxx 110: MUL TEMP[5].xyz, TEMP[8].xyzz, TEMP[9].xxxx 111: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[5].xyzz 112: MAD TEMP[9].x, TEMP[8].xxxx, IMM[2].zzzz, IMM[2].zzzz 113: MOV_SAT TEMP[8].x, TEMP[8].xxxx 114: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[9].xxxx 115: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[2].xyzz, TEMP[6].xyzz 116: DP3 TEMP[6].x, IN[2].xyzz, IN[2].xyzz 117: RSQ TEMP[6].x, TEMP[6].xxxx 118: MUL TEMP[6].xyz, IN[2].xyzz, TEMP[6].xxxx 119: DP3 TEMP[9].x, TEMP[6].xyzz, TEMP[7].xyzz 120: ADD TEMP[10].x, TEMP[9].xxxx, TEMP[9].xxxx 121: MOV TEMP[11].x, TEMP[9].xxxx 122: MOV_SAT TEMP[11].x, TEMP[9].xxxx 123: MAD TEMP[9].xyz, TEMP[7].xyzz, -TEMP[10].xxxx, TEMP[6].xyzz 124: DP3 TEMP[12].x, TEMP[5].xyzz, -TEMP[9].xyzz 125: MOV_SAT TEMP[12].x, TEMP[12].xxxx 126: MOV TEMP[2].w, TEMP[12].xxxx 127: ADD TEMP[5].x, TEMP[12].xxxx, IMM[2].wwww 128: MOV TEMP[4].xyz, TEMP[2] 129: FSGE TEMP[13].x, TEMP[5].xxxx, IMM[0].xxxx 130: UIF TEMP[13].xxxx :0 131: MOV TEMP[12].x, TEMP[12].xxxx 132: ELSE :0 133: MOV TEMP[12].x, IMM[3].xxxx 134: ENDIF 135: MOV_SAT TEMP[12].x, TEMP[12].xxxx 136: MOV TEMP[13].xy, IN[0].xyyy 137: TEX TEMP[13], TEMP[13], SAMP[4], 2D 138: MAX TEMP[13], TEMP[13], CONST[27] 139: MUL TEMP[5].x, TEMP[13].wwww, CONST[1].xxxx 140: POW TEMP[12].x, TEMP[12].xxxx, TEMP[5].xxxx 141: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[12].xxxx 142: MUL TEMP[2].xyz, TEMP[4].xyzz, TEMP[8].xxxx 143: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 144: MUL TEMP[5].xyz, TEMP[6].xyzz, TEMP[8].xxxx 145: MAD TEMP[5].xyz, TEMP[10].xxxx, TEMP[7].xyzz, -TEMP[5].xyzz 146: MAX TEMP[7].x, TEMP[7].zzzz, IMM[0].xxxx 147: MOV TEMP[8].xyz, TEMP[5].xyzz 148: TEX TEMP[8].xyz, TEMP[8], SAMP[7], CUBE 149: MUL TEMP[5].xyz, TEMP[8].xyzz, CONST[30].zzzz 150: MUL TEMP[5].xyz, TEMP[5].xyzz, CONST[1].zzzz 151: MAD TEMP[2].xyz, TEMP[2].xyzz, CONST[0].wwww, TEMP[5].xyzz 152: MUL TEMP[2].xyz, TEMP[13].xxxx, TEMP[2].xyzz 153: LRP TEMP[5].xyz, TEMP[13].zzzz, CONST[0].xyzz, TEMP[3].xyzz 154: MUL TEMP[4].xyz, TEMP[13].yyyy, IN[3].xyzz 155: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz 156: MOV TEMP[11].y, IMM[2].zzzz 157: MOV TEMP[8].xy, TEMP[11].xyyy 158: TEX TEMP[8].xyz, TEMP[8], SAMP[8], 2D 159: MOV TEMP[11].xy, IN[0].xyyy 160: TEX TEMP[11], TEMP[11], SAMP[3], 2D 161: MAX TEMP[10].xyz, TEMP[11].wxzz, CONST[26].wxzz 162: MAX TEMP[12].x, TEMP[8].zzzz, TEMP[10].zzzz 163: MUL TEMP[6].xyz, TEMP[2].xyzz, TEMP[12].xxxx 164: MAX TEMP[11].x, TEMP[11].yyyy, CONST[3].yyyy 165: MUL TEMP[11].x, TEMP[8].yyyy, TEMP[11].xxxx 166: MOV TEMP[5].y, IN[1].wwww 167: MOV TEMP[5].z, IN[2].wwww 168: MOV TEMP[5].xy, TEMP[5].yzzz 169: TEX TEMP[5].xyz, TEMP[5], SAMP[5], 2D 170: MUL TEMP[5].xyz, TEMP[5].xyzz, CONST[8].xyzz 171: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[2].xxxx, IMM[2].yyyy 172: MUL TEMP[13].x, TEMP[10].yyyy, CONST[3].zzzz 173: MAD TEMP[5].xyz, TEMP[13].xxxx, TEMP[5].xyzz, IMM[0].yyyy 174: MUL TEMP[9].xyz, TEMP[3].xyzz, TEMP[5].xyzz 175: LG2 TEMP[13].x, TEMP[9].xxxx 176: LG2 TEMP[14].x, TEMP[9].yyyy 177: MOV TEMP[13].y, TEMP[14].xxxx 178: LG2 TEMP[14].x, TEMP[9].zzzz 179: MOV TEMP[13].z, TEMP[14].xxxx 180: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[3].yyyy 181: EX2 TEMP[14].x, TEMP[13].xxxx 182: EX2 TEMP[15].x, TEMP[13].yyyy 183: MOV TEMP[14].y, TEMP[15].xxxx 184: EX2 TEMP[13].x, TEMP[13].zzzz 185: MOV TEMP[14].z, TEMP[13].xxxx 186: MOV TEMP[13].xyz, TEMP[14].xyzz 187: TEX TEMP[13].xyz, TEMP[13], SAMP[6], 3D 188: MAD TEMP[3].xyz, TEMP[3].xyzz, -TEMP[5].xyzz, TEMP[13].xyzz 189: MAD TEMP[3].xyz, TEMP[11].xxxx, TEMP[3].xyzz, TEMP[9].xyzz 190: MOV TEMP[5].xy, IN[0].zwww 191: TEX TEMP[5].xyz, TEMP[5], SAMP[2], 2D 192: MAD TEMP[3].xyz, TEMP[5].xyzz, CONST[3].wwww, TEMP[3].xyzz 193: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xyzz, TEMP[6].xyzz 194: MAD TEMP[2].xyz, TEMP[2].xyzz, TEMP[12].xxxx, -TEMP[0].xyzz 195: MAD TEMP[0].xyz, TEMP[10].zzzz, TEMP[2].xyzz, TEMP[0].xyzz 196: MOV_SAT TEMP[5].x, TEMP[10].xxxx 197: MUL TEMP[2].xyz, TEMP[7].xxxx, TEMP[4].xyzz 198: MAD TEMP[0].xyz, TEMP[2].xyzz, TEMP[8].xxxx, TEMP[0].xyzz 199: LRP TEMP[2].xyz, TEMP[5].xxxx, TEMP[3].xyzz, TEMP[0].xyzz 200: MUL TEMP[0].xyz, TEMP[2].xyzz, CONST[30].xxxx 201: MAD TEMP[3].xyz, TEMP[2].xyzz, -CONST[30].xxxx, CONST[29].xyzz 202: MUL TEMP[2].x, IN[3].wwww, IN[3].wwww 203: MAD TEMP[1].xyz, TEMP[2].xxxx, TEMP[3].xyzz, TEMP[0].xyzz 204: MOV OUT[0], TEMP[1] 205: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 328) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 340) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 344) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 356) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 360) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 364) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 424) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 428) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 432) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 436) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 440) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 444) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 448) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 452) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 456) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 464) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 468) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 472) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 476) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 480) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 488) %71 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %72 = load <8 x i32> addrspace(2)* %71, !tbaa !0 %73 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %74 = load <4 x i32> addrspace(2)* %73, !tbaa !0 %75 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %76 = load <8 x i32> addrspace(2)* %75, !tbaa !0 %77 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %78 = load <4 x i32> addrspace(2)* %77, !tbaa !0 %79 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %80 = load <8 x i32> addrspace(2)* %79, !tbaa !0 %81 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %82 = load <4 x i32> addrspace(2)* %81, !tbaa !0 %83 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %84 = load <8 x i32> addrspace(2)* %83, !tbaa !0 %85 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %86 = load <4 x i32> addrspace(2)* %85, !tbaa !0 %87 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %88 = load <8 x i32> addrspace(2)* %87, !tbaa !0 %89 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %90 = load <4 x i32> addrspace(2)* %89, !tbaa !0 %91 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %92 = load <8 x i32> addrspace(2)* %91, !tbaa !0 %93 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %94 = load <4 x i32> addrspace(2)* %93, !tbaa !0 %95 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %96 = load <8 x i32> addrspace(2)* %95, !tbaa !0 %97 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %98 = load <4 x i32> addrspace(2)* %97, !tbaa !0 %99 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %100 = load <8 x i32> addrspace(2)* %99, !tbaa !0 %101 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %102 = load <4 x i32> addrspace(2)* %101, !tbaa !0 %103 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %104 = load <8 x i32> addrspace(2)* %103, !tbaa !0 %105 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %106 = load <4 x i32> addrspace(2)* %105, !tbaa !0 %107 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %108 = load <8 x i32> addrspace(2)* %107, !tbaa !0 %109 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %110 = load <4 x i32> addrspace(2)* %109, !tbaa !0 %111 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 10 %112 = load <8 x i32> addrspace(2)* %111, !tbaa !0 %113 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 10 %114 = load <4 x i32> addrspace(2)* %113, !tbaa !0 %115 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %117 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %130 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %131 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %132 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %133 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %134 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %135 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %136 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %137 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %138 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %139 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %5, <2 x i32> %7) %140 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %5, <2 x i32> %7) %141 = call float @llvm.SI.fs.interp(i32 2, i32 7, i32 %5, <2 x i32> %7) %142 = call float @llvm.SI.fs.interp(i32 0, i32 8, i32 %5, <2 x i32> %7) %143 = call float @llvm.SI.fs.interp(i32 1, i32 8, i32 %5, <2 x i32> %7) %144 = call float @llvm.SI.fs.interp(i32 2, i32 8, i32 %5, <2 x i32> %7) %145 = call float @llvm.SI.fs.interp(i32 3, i32 8, i32 %5, <2 x i32> %7) %146 = call float @fabs(float %34) %147 = fmul float %68, %132 %148 = bitcast float %115 to i32 %149 = bitcast float %116 to i32 %150 = insertelement <2 x i32> undef, i32 %148, i32 0 %151 = insertelement <2 x i32> %150, i32 %149, i32 1 %152 = bitcast <8 x i32> %72 to <32 x i8> %153 = bitcast <4 x i32> %74 to <16 x i8> %154 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %151, <32 x i8> %152, <16 x i8> %153, i32 2) %155 = extractelement <4 x float> %154, i32 0 %156 = extractelement <4 x float> %154, i32 1 %157 = extractelement <4 x float> %154, i32 2 %158 = extractelement <4 x float> %154, i32 3 %159 = fmul float %158, %30 %160 = fsub float -0.000000e+00, %146 %161 = fcmp oge float %160, 0.000000e+00 %162 = sext i1 %161 to i32 %163 = bitcast i32 %162 to float %164 = bitcast float %163 to i32 %165 = icmp ne i32 %164, 0 %. = select i1 %165, float %159, float %147 %166 = fmul float 1.000000e+00, %142 %167 = fmul float 1.000000e+00, %143 %168 = fmul float 1.000000e+00, %144 %169 = fadd float %166, 9.765625e-04 %170 = fadd float %167, 9.765625e-04 %171 = fadd float %168, 0.000000e+00 %172 = bitcast float %171 to i32 %173 = bitcast float %169 to i32 %174 = bitcast float %170 to i32 %175 = insertelement <4 x i32> undef, i32 %172, i32 0 %176 = insertelement <4 x i32> %175, i32 %173, i32 1 %177 = insertelement <4 x i32> %176, i32 %174, i32 2 %178 = insertelement <4 x i32> %177, i32 undef, i32 3 %179 = bitcast <8 x i32> %112 to <32 x i8> %180 = bitcast <4 x i32> %114 to <16 x i8> %181 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %178, <32 x i8> %179, <16 x i8> %180, i32 7) %182 = extractelement <4 x float> %181, i32 0 %183 = fadd float %166, -9.765625e-04 %184 = fadd float %167, 9.765625e-04 %185 = fadd float %168, 0.000000e+00 %186 = bitcast float %185 to i32 %187 = bitcast float %183 to i32 %188 = bitcast float %184 to i32 %189 = insertelement <4 x i32> undef, i32 %186, i32 0 %190 = insertelement <4 x i32> %189, i32 %187, i32 1 %191 = insertelement <4 x i32> %190, i32 %188, i32 2 %192 = insertelement <4 x i32> %191, i32 undef, i32 3 %193 = bitcast <8 x i32> %112 to <32 x i8> %194 = bitcast <4 x i32> %114 to <16 x i8> %195 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %192, <32 x i8> %193, <16 x i8> %194, i32 7) %196 = extractelement <4 x float> %195, i32 0 %197 = fadd float %166, 9.765625e-04 %198 = fadd float %167, -9.765625e-04 %199 = fadd float %168, 0.000000e+00 %200 = bitcast float %199 to i32 %201 = bitcast float %197 to i32 %202 = bitcast float %198 to i32 %203 = insertelement <4 x i32> undef, i32 %200, i32 0 %204 = insertelement <4 x i32> %203, i32 %201, i32 1 %205 = insertelement <4 x i32> %204, i32 %202, i32 2 %206 = insertelement <4 x i32> %205, i32 undef, i32 3 %207 = bitcast <8 x i32> %112 to <32 x i8> %208 = bitcast <4 x i32> %114 to <16 x i8> %209 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %206, <32 x i8> %207, <16 x i8> %208, i32 7) %210 = extractelement <4 x float> %209, i32 0 %211 = fadd float %166, -9.765625e-04 %212 = fadd float %167, -9.765625e-04 %213 = fadd float %168, 0.000000e+00 %214 = bitcast float %213 to i32 %215 = bitcast float %211 to i32 %216 = bitcast float %212 to i32 %217 = insertelement <4 x i32> undef, i32 %214, i32 0 %218 = insertelement <4 x i32> %217, i32 %215, i32 1 %219 = insertelement <4 x i32> %218, i32 %216, i32 2 %220 = insertelement <4 x i32> %219, i32 undef, i32 3 %221 = bitcast <8 x i32> %112 to <32 x i8> %222 = bitcast <4 x i32> %114 to <16 x i8> %223 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %220, <32 x i8> %221, <16 x i8> %222, i32 7) %224 = extractelement <4 x float> %223, i32 0 %225 = fmul float %182, 0x3FB32D1040000000 %226 = fmul float %196, 0x3FB32D1040000000 %227 = fadd float %225, %226 %228 = fmul float %210, 0x3FB32D1040000000 %229 = fadd float %227, %228 %230 = fmul float %224, 0x3FB32D1040000000 %231 = fadd float %229, %230 %232 = fadd float %166, 9.765625e-04 %233 = fadd float %167, 0.000000e+00 %234 = fadd float %168, 0.000000e+00 %235 = bitcast float %234 to i32 %236 = bitcast float %232 to i32 %237 = bitcast float %233 to i32 %238 = insertelement <4 x i32> undef, i32 %235, i32 0 %239 = insertelement <4 x i32> %238, i32 %236, i32 1 %240 = insertelement <4 x i32> %239, i32 %237, i32 2 %241 = insertelement <4 x i32> %240, i32 undef, i32 3 %242 = bitcast <8 x i32> %112 to <32 x i8> %243 = bitcast <4 x i32> %114 to <16 x i8> %244 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %241, <32 x i8> %242, <16 x i8> %243, i32 7) %245 = extractelement <4 x float> %244, i32 0 %246 = fadd float %166, -9.765625e-04 %247 = fadd float %167, 0.000000e+00 %248 = fadd float %168, 0.000000e+00 %249 = bitcast float %248 to i32 %250 = bitcast float %246 to i32 %251 = bitcast float %247 to i32 %252 = insertelement <4 x i32> undef, i32 %249, i32 0 %253 = insertelement <4 x i32> %252, i32 %250, i32 1 %254 = insertelement <4 x i32> %253, i32 %251, i32 2 %255 = insertelement <4 x i32> %254, i32 undef, i32 3 %256 = bitcast <8 x i32> %112 to <32 x i8> %257 = bitcast <4 x i32> %114 to <16 x i8> %258 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %255, <32 x i8> %256, <16 x i8> %257, i32 7) %259 = extractelement <4 x float> %258, i32 0 %260 = fadd float %166, 0.000000e+00 %261 = fadd float %167, -9.765625e-04 %262 = fadd float %168, 0.000000e+00 %263 = bitcast float %262 to i32 %264 = bitcast float %260 to i32 %265 = bitcast float %261 to i32 %266 = insertelement <4 x i32> undef, i32 %263, i32 0 %267 = insertelement <4 x i32> %266, i32 %264, i32 1 %268 = insertelement <4 x i32> %267, i32 %265, i32 2 %269 = insertelement <4 x i32> %268, i32 undef, i32 3 %270 = bitcast <8 x i32> %112 to <32 x i8> %271 = bitcast <4 x i32> %114 to <16 x i8> %272 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %269, <32 x i8> %270, <16 x i8> %271, i32 7) %273 = extractelement <4 x float> %272, i32 0 %274 = fadd float %166, 0.000000e+00 %275 = fadd float %167, 9.765625e-04 %276 = fadd float %168, 0.000000e+00 %277 = fadd float %166, 0.000000e+00 %278 = fadd float %167, 0.000000e+00 %279 = fadd float %168, 0.000000e+00 %280 = bitcast float %276 to i32 %281 = bitcast float %274 to i32 %282 = bitcast float %275 to i32 %283 = insertelement <4 x i32> undef, i32 %280, i32 0 %284 = insertelement <4 x i32> %283, i32 %281, i32 1 %285 = insertelement <4 x i32> %284, i32 %282, i32 2 %286 = insertelement <4 x i32> %285, i32 undef, i32 3 %287 = bitcast <8 x i32> %112 to <32 x i8> %288 = bitcast <4 x i32> %114 to <16 x i8> %289 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %286, <32 x i8> %287, <16 x i8> %288, i32 7) %290 = extractelement <4 x float> %289, i32 0 %291 = fmul float %245, 0x3FBFA3F480000000 %292 = fmul float %259, 0x3FBFA3F480000000 %293 = fadd float %291, %292 %294 = fmul float %273, 0x3FBFA3F480000000 %295 = fadd float %293, %294 %296 = fmul float %290, 0x3FBFA3F480000000 %297 = fadd float %295, %296 %298 = fadd float %231, %297 %299 = bitcast float %279 to i32 %300 = bitcast float %277 to i32 %301 = bitcast float %278 to i32 %302 = insertelement <4 x i32> undef, i32 %299, i32 0 %303 = insertelement <4 x i32> %302, i32 %300, i32 1 %304 = insertelement <4 x i32> %303, i32 %301, i32 2 %305 = insertelement <4 x i32> %304, i32 undef, i32 3 %306 = bitcast <8 x i32> %112 to <32 x i8> %307 = bitcast <4 x i32> %114 to <16 x i8> %308 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %305, <32 x i8> %306, <16 x i8> %307, i32 7) %309 = extractelement <4 x float> %308, i32 0 %310 = fmul float %309, 0x3FCA5DF660000000 %311 = fadd float %310, %298 %312 = fadd float -2.000000e+04, %131 %313 = fdiv float 1.000000e+00, %41 %314 = fsub float -0.000000e+00, %312 %315 = fmul float %314, %313 %316 = fmul float %39, %315 %317 = fadd float %316, %129 %318 = fmul float %40, %315 %319 = fadd float %318, %130 %320 = fdiv float 1.000000e+00, %54 %321 = fmul float %317, %320 %322 = fadd float %321, %35 %323 = fmul float %319, %320 %324 = fadd float %323, %36 %325 = fmul float %317, %320 %326 = fadd float %325, %37 %327 = fmul float %319, %320 %328 = fadd float %327, %38 %329 = bitcast float %322 to i32 %330 = bitcast float %324 to i32 %331 = insertelement <2 x i32> undef, i32 %329, i32 0 %332 = insertelement <2 x i32> %331, i32 %330, i32 1 %333 = bitcast <8 x i32> %108 to <32 x i8> %334 = bitcast <4 x i32> %110 to <16 x i8> %335 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %332, <32 x i8> %333, <16 x i8> %334, i32 2) %336 = extractelement <4 x float> %335, i32 0 %337 = bitcast float %326 to i32 %338 = bitcast float %328 to i32 %339 = insertelement <2 x i32> undef, i32 %337, i32 0 %340 = insertelement <2 x i32> %339, i32 %338, i32 1 %341 = bitcast <8 x i32> %108 to <32 x i8> %342 = bitcast <4 x i32> %110 to <16 x i8> %343 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %340, <32 x i8> %341, <16 x i8> %342, i32 2) %344 = extractelement <4 x float> %343, i32 1 %345 = fmul float %336, %344 %346 = fsub float -0.000000e+00, %344 %347 = fmul float %336, %346 %348 = fadd float %347, 1.000000e+00 %349 = fmul float %345, %62 %350 = fmul float %345, %63 %351 = fmul float %345, %64 %352 = fmul float %311, %349 %353 = fmul float %311, %350 %354 = fmul float %311, %351 %355 = fsub float -0.000000e+00, %311 %356 = fadd float %355, 1.000000e+00 %357 = call float @llvm.maxnum.f32(float %356, float %348) %358 = fmul float %357, %48 %359 = fmul float %357, %49 %360 = fmul float %357, %50 %361 = fmul float %358, %119 %362 = fmul float %359, %119 %363 = fmul float %360, %119 %364 = bitcast float %115 to i32 %365 = bitcast float %116 to i32 %366 = insertelement <2 x i32> undef, i32 %364, i32 0 %367 = insertelement <2 x i32> %366, i32 %365, i32 1 %368 = bitcast <8 x i32> %76 to <32 x i8> %369 = bitcast <4 x i32> %78 to <16 x i8> %370 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %367, <32 x i8> %368, <16 x i8> %369, i32 2) %371 = extractelement <4 x float> %370, i32 0 %372 = extractelement <4 x float> %370, i32 1 %373 = extractelement <4 x float> %370, i32 2 %374 = fmul float %371, 2.000000e+00 %375 = fadd float %374, -1.000000e+00 %376 = fmul float %372, 2.000000e+00 %377 = fadd float %376, -1.000000e+00 %378 = fmul float %373, 2.000000e+00 %379 = fadd float %378, -1.000000e+00 %380 = fmul float %375, %375 %381 = fmul float %377, %377 %382 = fadd float %381, %380 %383 = fmul float %379, %379 %384 = fadd float %382, %383 %385 = call float @llvm.AMDGPU.rsq.clamped.f32(float %384) %386 = fmul float %375, %385 %387 = fmul float %377, %385 %388 = fmul float %379, %385 %389 = fmul float %387, %136 %390 = fmul float %387, %137 %391 = fmul float %387, %138 %392 = fmul float %386, %133 %393 = fadd float %392, %389 %394 = fmul float %386, %134 %395 = fadd float %394, %390 %396 = fmul float %386, %135 %397 = fadd float %396, %391 %398 = fmul float %388, %139 %399 = fadd float %398, %393 %400 = fmul float %388, %140 %401 = fadd float %400, %395 %402 = fmul float %388, %141 %403 = fadd float %402, %397 %404 = fmul float %399, %399 %405 = fmul float %401, %401 %406 = fadd float %405, %404 %407 = fmul float %403, %403 %408 = fadd float %406, %407 %409 = call float @llvm.AMDGPU.rsq.clamped.f32(float %408) %410 = fmul float %399, %409 %411 = fmul float %401, %409 %412 = fmul float %403, %409 %413 = fmul float %51, %410 %414 = fmul float %52, %411 %415 = fadd float %414, %413 %416 = fmul float %53, %412 %417 = fadd float %415, %416 %418 = call float @llvm.AMDIL.clamp.(float %417, float 0.000000e+00, float 1.000000e+00) %419 = fmul float %418, %45 %420 = fmul float %418, %46 %421 = fmul float %418, %47 %422 = fmul float %419, %119 %423 = fadd float %422, %361 %424 = fmul float %420, %119 %425 = fadd float %424, %362 %426 = fmul float %421, %119 %427 = fadd float %426, %363 %428 = fsub float -0.000000e+00, %145 %429 = fcmp oge float %428, 0.000000e+00 %430 = sext i1 %429 to i32 %431 = bitcast i32 %430 to float %432 = bitcast float %431 to i32 %433 = icmp ne i32 %432, 0 %temp32.0 = select i1 %433, float 0.000000e+00, float 1.000000e+00 %434 = fmul float %311, %temp32.0 %435 = fsub float -0.000000e+00, %39 %436 = fsub float -0.000000e+00, %40 %437 = fsub float -0.000000e+00, %41 %438 = fmul float %435, %435 %439 = fmul float %436, %436 %440 = fadd float %439, %438 %441 = fmul float %437, %437 %442 = fadd float %440, %441 %443 = call float @llvm.AMDGPU.rsq.clamped.f32(float %442) %444 = fmul float %435, %443 %445 = fmul float %436, %443 %446 = fmul float %437, %443 %447 = fmul float %410, %444 %448 = fmul float %411, %445 %449 = fadd float %448, %447 %450 = fmul float %412, %446 %451 = fadd float %449, %450 %452 = fmul float %451, 5.000000e-01 %453 = fadd float %452, 5.000000e-01 %454 = call float @llvm.AMDIL.clamp.(float %451, float 0.000000e+00, float 1.000000e+00) %455 = fmul float %434, %453 %456 = fmul float %455, %352 %457 = fadd float %456, %423 %458 = fmul float %455, %353 %459 = fadd float %458, %425 %460 = fmul float %455, %354 %461 = fadd float %460, %427 %462 = fmul float %121, %121 %463 = fmul float %122, %122 %464 = fadd float %463, %462 %465 = fmul float %123, %123 %466 = fadd float %464, %465 %467 = call float @llvm.AMDGPU.rsq.clamped.f32(float %466) %468 = fmul float %121, %467 %469 = fmul float %122, %467 %470 = fmul float %123, %467 %471 = fmul float %468, %410 %472 = fmul float %469, %411 %473 = fadd float %472, %471 %474 = fmul float %470, %412 %475 = fadd float %473, %474 %476 = fadd float %475, %475 %477 = call float @llvm.AMDIL.clamp.(float %475, float 0.000000e+00, float 1.000000e+00) %478 = fsub float -0.000000e+00, %476 %479 = fmul float %410, %478 %480 = fadd float %479, %468 %481 = fsub float -0.000000e+00, %476 %482 = fmul float %411, %481 %483 = fadd float %482, %469 %484 = fsub float -0.000000e+00, %476 %485 = fmul float %412, %484 %486 = fadd float %485, %470 %487 = fsub float -0.000000e+00, %480 %488 = fsub float -0.000000e+00, %483 %489 = fsub float -0.000000e+00, %486 %490 = fmul float %444, %487 %491 = fmul float %445, %488 %492 = fadd float %491, %490 %493 = fmul float %446, %489 %494 = fadd float %492, %493 %495 = call float @llvm.AMDIL.clamp.(float %494, float 0.000000e+00, float 1.000000e+00) %496 = fadd float %495, 0xBF847AE140000000 %497 = fcmp oge float %496, 0.000000e+00 %498 = sext i1 %497 to i32 %499 = bitcast i32 %498 to float %500 = bitcast float %499 to i32 %501 = icmp ne i32 %500, 0 %.70 = select i1 %501, float %495, float 0x3F847AE140000000 %502 = call float @llvm.AMDIL.clamp.(float %.70, float 0.000000e+00, float 1.000000e+00) %503 = bitcast float %115 to i32 %504 = bitcast float %116 to i32 %505 = insertelement <2 x i32> undef, i32 %503, i32 0 %506 = insertelement <2 x i32> %505, i32 %504, i32 1 %507 = bitcast <8 x i32> %88 to <32 x i8> %508 = bitcast <4 x i32> %90 to <16 x i8> %509 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %506, <32 x i8> %507, <16 x i8> %508, i32 2) %510 = extractelement <4 x float> %509, i32 0 %511 = extractelement <4 x float> %509, i32 1 %512 = extractelement <4 x float> %509, i32 2 %513 = extractelement <4 x float> %509, i32 3 %514 = call float @llvm.maxnum.f32(float %510, float %58) %515 = call float @llvm.maxnum.f32(float %511, float %59) %516 = call float @llvm.maxnum.f32(float %512, float %60) %517 = call float @llvm.maxnum.f32(float %513, float %61) %518 = fmul float %517, %28 %519 = call float @llvm.pow.f32(float %502, float %518) %520 = fmul float %454, %519 %521 = fmul float %352, %520 %522 = fmul float %353, %520 %523 = fmul float %354, %520 %524 = fmul float %410, %410 %525 = fmul float %411, %411 %526 = fadd float %525, %524 %527 = fmul float %412, %412 %528 = fadd float %526, %527 %529 = fmul float %468, %528 %530 = fmul float %469, %528 %531 = fmul float %470, %528 %532 = fsub float -0.000000e+00, %529 %533 = fmul float %476, %410 %534 = fadd float %533, %532 %535 = fsub float -0.000000e+00, %530 %536 = fmul float %476, %411 %537 = fadd float %536, %535 %538 = fsub float -0.000000e+00, %531 %539 = fmul float %476, %412 %540 = fadd float %539, %538 %541 = call float @llvm.maxnum.f32(float %412, float 0.000000e+00) %542 = insertelement <4 x float> undef, float %534, i32 0 %543 = insertelement <4 x float> %542, float %537, i32 1 %544 = insertelement <4 x float> %543, float %540, i32 2 %545 = insertelement <4 x float> %544, float 0.000000e+00, i32 3 %546 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %545) %547 = extractelement <4 x float> %546, i32 0 %548 = extractelement <4 x float> %546, i32 1 %549 = extractelement <4 x float> %546, i32 2 %550 = extractelement <4 x float> %546, i32 3 %551 = call float @fabs(float %549) %552 = fdiv float 1.000000e+00, %551 %553 = fmul float %547, %552 %554 = fadd float %553, 1.500000e+00 %555 = fmul float %548, %552 %556 = fadd float %555, 1.500000e+00 %557 = bitcast float %556 to i32 %558 = bitcast float %554 to i32 %559 = bitcast float %550 to i32 %560 = insertelement <4 x i32> undef, i32 %557, i32 0 %561 = insertelement <4 x i32> %560, i32 %558, i32 1 %562 = insertelement <4 x i32> %561, i32 %559, i32 2 %563 = insertelement <4 x i32> %562, i32 undef, i32 3 %564 = bitcast <8 x i32> %100 to <32 x i8> %565 = bitcast <4 x i32> %102 to <16 x i8> %566 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %563, <32 x i8> %564, <16 x i8> %565, i32 4) %567 = extractelement <4 x float> %566, i32 0 %568 = extractelement <4 x float> %566, i32 1 %569 = extractelement <4 x float> %566, i32 2 %570 = fmul float %567, %70 %571 = fmul float %568, %70 %572 = fmul float %569, %70 %573 = fmul float %570, %29 %574 = fmul float %571, %29 %575 = fmul float %572, %29 %576 = fmul float %521, %27 %577 = fadd float %576, %573 %578 = fmul float %522, %27 %579 = fadd float %578, %574 %580 = fmul float %523, %27 %581 = fadd float %580, %575 %582 = fmul float %514, %577 %583 = fmul float %514, %579 %584 = fmul float %514, %581 %585 = call float @llvm.AMDGPU.lrp(float %516, float %24, float %155) %586 = call float @llvm.AMDGPU.lrp(float %516, float %25, float %156) %587 = call float @llvm.AMDGPU.lrp(float %516, float %26, float %157) %588 = fmul float %515, %125 %589 = fmul float %515, %126 %590 = fmul float %515, %127 %591 = fmul float %582, %585 %592 = fmul float %583, %586 %593 = fmul float %584, %587 %594 = bitcast float %477 to i32 %595 = bitcast float 5.000000e-01 to i32 %596 = insertelement <2 x i32> undef, i32 %594, i32 0 %597 = insertelement <2 x i32> %596, i32 %595, i32 1 %598 = bitcast <8 x i32> %104 to <32 x i8> %599 = bitcast <4 x i32> %106 to <16 x i8> %600 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %597, <32 x i8> %598, <16 x i8> %599, i32 2) %601 = extractelement <4 x float> %600, i32 0 %602 = extractelement <4 x float> %600, i32 1 %603 = extractelement <4 x float> %600, i32 2 %604 = bitcast float %115 to i32 %605 = bitcast float %116 to i32 %606 = insertelement <2 x i32> undef, i32 %604, i32 0 %607 = insertelement <2 x i32> %606, i32 %605, i32 1 %608 = bitcast <8 x i32> %84 to <32 x i8> %609 = bitcast <4 x i32> %86 to <16 x i8> %610 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %607, <32 x i8> %608, <16 x i8> %609, i32 2) %611 = extractelement <4 x float> %610, i32 0 %612 = extractelement <4 x float> %610, i32 1 %613 = extractelement <4 x float> %610, i32 2 %614 = extractelement <4 x float> %610, i32 3 %615 = call float @llvm.maxnum.f32(float %614, float %57) %616 = call float @llvm.maxnum.f32(float %611, float %55) %617 = call float @llvm.maxnum.f32(float %613, float %56) %618 = call float @llvm.maxnum.f32(float %603, float %617) %619 = fmul float %591, %618 %620 = fmul float %592, %618 %621 = fmul float %593, %618 %622 = call float @llvm.maxnum.f32(float %612, float %31) %623 = fmul float %602, %622 %624 = bitcast float %120 to i32 %625 = bitcast float %124 to i32 %626 = insertelement <2 x i32> undef, i32 %624, i32 0 %627 = insertelement <2 x i32> %626, i32 %625, i32 1 %628 = bitcast <8 x i32> %92 to <32 x i8> %629 = bitcast <4 x i32> %94 to <16 x i8> %630 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %627, <32 x i8> %628, <16 x i8> %629, i32 2) %631 = extractelement <4 x float> %630, i32 0 %632 = extractelement <4 x float> %630, i32 1 %633 = extractelement <4 x float> %630, i32 2 %634 = fmul float %631, %42 %635 = fmul float %632, %43 %636 = fmul float %633, %44 %637 = fmul float %634, 2.000000e+00 %638 = fadd float %637, -1.000000e+00 %639 = fmul float %635, 2.000000e+00 %640 = fadd float %639, -1.000000e+00 %641 = fmul float %636, 2.000000e+00 %642 = fadd float %641, -1.000000e+00 %643 = fmul float %616, %32 %644 = fmul float %643, %638 %645 = fadd float %644, 1.000000e+00 %646 = fmul float %643, %640 %647 = fadd float %646, 1.000000e+00 %648 = fmul float %643, %642 %649 = fadd float %648, 1.000000e+00 %650 = fmul float %155, %645 %651 = fmul float %156, %647 %652 = fmul float %157, %649 %653 = call float @llvm.log2.f32(float %650) %654 = call float @llvm.log2.f32(float %651) %655 = call float @llvm.log2.f32(float %652) %656 = fmul float %653, 0x3FDD168720000000 %657 = fmul float %654, 0x3FDD168720000000 %658 = fmul float %655, 0x3FDD168720000000 %659 = call float @llvm.AMDIL.exp.(float %656) %660 = call float @llvm.AMDIL.exp.(float %657) %661 = call float @llvm.AMDIL.exp.(float %658) %662 = bitcast float %659 to i32 %663 = bitcast float %660 to i32 %664 = bitcast float %661 to i32 %665 = insertelement <4 x i32> undef, i32 %662, i32 0 %666 = insertelement <4 x i32> %665, i32 %663, i32 1 %667 = insertelement <4 x i32> %666, i32 %664, i32 2 %668 = insertelement <4 x i32> %667, i32 undef, i32 3 %669 = bitcast <8 x i32> %96 to <32 x i8> %670 = bitcast <4 x i32> %98 to <16 x i8> %671 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %668, <32 x i8> %669, <16 x i8> %670, i32 3) %672 = extractelement <4 x float> %671, i32 0 %673 = extractelement <4 x float> %671, i32 1 %674 = extractelement <4 x float> %671, i32 2 %675 = fsub float -0.000000e+00, %645 %676 = fmul float %155, %675 %677 = fadd float %676, %672 %678 = fsub float -0.000000e+00, %647 %679 = fmul float %156, %678 %680 = fadd float %679, %673 %681 = fsub float -0.000000e+00, %649 %682 = fmul float %157, %681 %683 = fadd float %682, %674 %684 = fmul float %623, %677 %685 = fadd float %684, %650 %686 = fmul float %623, %680 %687 = fadd float %686, %651 %688 = fmul float %623, %683 %689 = fadd float %688, %652 %690 = bitcast float %117 to i32 %691 = bitcast float %118 to i32 %692 = insertelement <2 x i32> undef, i32 %690, i32 0 %693 = insertelement <2 x i32> %692, i32 %691, i32 1 %694 = bitcast <8 x i32> %80 to <32 x i8> %695 = bitcast <4 x i32> %82 to <16 x i8> %696 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %693, <32 x i8> %694, <16 x i8> %695, i32 2) %697 = extractelement <4 x float> %696, i32 0 %698 = extractelement <4 x float> %696, i32 1 %699 = extractelement <4 x float> %696, i32 2 %700 = fmul float %697, %33 %701 = fadd float %700, %685 %702 = fmul float %698, %33 %703 = fadd float %702, %687 %704 = fmul float %699, %33 %705 = fadd float %704, %689 %706 = fmul float %457, %701 %707 = fadd float %706, %619 %708 = fmul float %459, %703 %709 = fadd float %708, %620 %710 = fmul float %461, %705 %711 = fadd float %710, %621 %712 = fsub float -0.000000e+00, %707 %713 = fmul float %591, %618 %714 = fadd float %713, %712 %715 = fsub float -0.000000e+00, %709 %716 = fmul float %592, %618 %717 = fadd float %716, %715 %718 = fsub float -0.000000e+00, %711 %719 = fmul float %593, %618 %720 = fadd float %719, %718 %721 = fmul float %617, %714 %722 = fadd float %721, %707 %723 = fmul float %617, %717 %724 = fadd float %723, %709 %725 = fmul float %617, %720 %726 = fadd float %725, %711 %727 = call float @llvm.AMDIL.clamp.(float %615, float 0.000000e+00, float 1.000000e+00) %728 = fmul float %541, %588 %729 = fmul float %541, %589 %730 = fmul float %541, %590 %731 = fmul float %728, %601 %732 = fadd float %731, %722 %733 = fmul float %729, %601 %734 = fadd float %733, %724 %735 = fmul float %730, %601 %736 = fadd float %735, %726 %737 = call float @llvm.AMDGPU.lrp(float %727, float %701, float %732) %738 = call float @llvm.AMDGPU.lrp(float %727, float %703, float %734) %739 = call float @llvm.AMDGPU.lrp(float %727, float %705, float %736) %740 = fmul float %737, %69 %741 = fmul float %738, %69 %742 = fmul float %739, %69 %743 = fsub float -0.000000e+00, %69 %744 = fmul float %737, %743 %745 = fadd float %744, %65 %746 = fsub float -0.000000e+00, %69 %747 = fmul float %738, %746 %748 = fadd float %747, %66 %749 = fsub float -0.000000e+00, %69 %750 = fmul float %739, %749 %751 = fadd float %750, %67 %752 = fmul float %128, %128 %753 = fmul float %752, %745 %754 = fadd float %753, %740 %755 = fmul float %752, %748 %756 = fadd float %755, %741 %757 = fmul float %752, %751 %758 = fadd float %757, %742 %759 = fcmp uge float %., %4 %760 = sext i1 %759 to i32 %761 = trunc i32 %760 to i1 %762 = select i1 %761, float 1.000000e+00, float -1.000000e+00 call void @llvm.AMDGPU.kill(float %762) %763 = call i32 @llvm.SI.packf16(float %754, float %756) %764 = bitcast i32 %763 to float %765 = call i32 @llvm.SI.packf16(float %758, float %.) %766 = bitcast i32 %765 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %764, float %766, float %764, float %766) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #4 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #3 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #4 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #3 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { readnone } attributes #4 = { nounwind readnone readonly } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 s16, s8 ; BE900308 s_mov_b64 vcc, s[6:7] ; BEEA0406 s_mov_b64 s[100:101], s[2:3] ; BEE40402 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v8, v0, 1, 0, [m0] ; C8200100 v_interp_p2_f32 v8, [v8], v1, 1, 0, [m0] ; C8210101 v_interp_p1_f32 v7, v0, 0, 0, [m0] ; C81C0000 v_interp_p2_f32 v7, [v7], v1, 0, 0, [m0] ; C81D0001 s_load_dwordx4 s[96:99], s[4:5], 0x0 ; C0B00500 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v51, s0, 0 ; 04670000 v_writelane_b32 v51, s1, 1 ; 04670201 v_writelane_b32 v51, s2, 2 ; 04670402 v_writelane_b32 v51, s3, 3 ; 04670603 s_load_dwordx4 s[48:51], s[4:5], 0xc ; C098050C s_load_dwordx4 s[72:75], s[4:5], 0x10 ; C0A40510 s_load_dwordx4 s[36:39], s[4:5], 0x14 ; C0920514 s_load_dwordx4 s[24:27], s[4:5], 0x18 ; C08C0518 s_load_dwordx4 s[84:87], s[4:5], 0x1c ; C0AA051C s_load_dwordx4 s[52:55], s[4:5], 0x20 ; C09A0520 s_load_dwordx4 s[64:67], s[4:5], 0x24 ; C0A00524 s_load_dwordx4 s[68:71], s[4:5], 0x28 ; C0A20528 s_load_dwordx8 s[0:7], vcc, 0x0 ; C0C06B00 s_load_dwordx8 s[8:15], vcc, 0x8 ; C0C46B08 s_load_dwordx8 s[28:35], vcc, 0x10 ; C0CE6B10 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v51, s28, 4 ; 0467081C v_writelane_b32 v51, s29, 5 ; 04670A1D v_writelane_b32 v51, s30, 6 ; 04670C1E v_writelane_b32 v51, s31, 7 ; 04670E1F v_writelane_b32 v51, s32, 8 ; 04671020 v_writelane_b32 v51, s33, 9 ; 04671221 v_writelane_b32 v51, s34, 10 ; 04671422 v_writelane_b32 v51, s35, 11 ; 04671623 s_load_dwordx8 s[56:63], vcc, 0x18 ; C0DC6B18 s_load_dwordx8 s[76:83], vcc, 0x20 ; C0E66B20 s_load_dwordx8 s[40:47], vcc, 0x28 ; C0D46B28 s_load_dwordx8 s[28:35], vcc, 0x30 ; C0CE6B30 s_load_dwordx8 s[88:95], vcc, 0x38 ; C0EC6B38 image_sample v[3:6], 15, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[0:7], s[96:99] ; F0800F00 03000307 s_load_dwordx4 s[0:3], s[100:101], 0x0 ; C0806500 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v6 ; 10040C04 v_interp_p1_f32 v9, v0, 3, 4, [m0] ; C8241300 v_interp_p2_f32 v9, [v9], v1, 3, 4, [m0] ; C8251301 s_buffer_load_dword s4, s[0:3], 0x77 ; C2020177 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s4, v9 ; 10121204 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 v_mov_b32_e32 v13, 0 ; 7E1A0280 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ge_f32_e64 s[4:5], -|s4|, 0 ; D00C0104 20010004 v_cndmask_b32_e64 v14, 0, -1, s[4:5] ; D200000E 00118280 v_cmp_ne_i32_e64 s[4:5], v14, 0 ; D10A0004 0001010E v_cndmask_b32_e64 v2, v9, v2, s[4:5] ; D2000002 08120509 v_cmp_nlt_f32_e64 s[4:5], v2, s16 ; D01C0004 00002102 v_cndmask_b32_e64 v9, -1.0, 1.0, s[4:5] ; D2000809 1811E4F3 v_cmpx_le_f32_e32 vcc, 0, v9 ; 7C261280 image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[8:15], s[20:23] ; F0800700 00A20E07 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, 2.0, v15, -1.0 ; D2820009 03CE1EF4 v_mad_f32 v17, 2.0, v14, -1.0 ; D2820011 03CE1CF4 v_mul_f32_e32 v18, v17, v17 ; 10242311 v_mad_f32 v18, v9, v9, v18 ; D2820012 044A1309 v_mad_f32 v14, 2.0, v16, -1.0 ; D282000E 03CE20F4 v_mad_f32 v15, v14, v14, v18 ; D282000F 044A1D0E v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F v_mul_f32_e32 v16, v15, v17 ; 1020230F v_mul_f32_e32 v9, v15, v9 ; 1012130F v_interp_p1_f32 v17, v0, 1, 6, [m0] ; C8441900 v_interp_p2_f32 v17, [v17], v1, 1, 6, [m0] ; C8451901 v_mul_f32_e32 v17, v17, v9 ; 10221311 v_interp_p1_f32 v18, v0, 1, 5, [m0] ; C8481500 v_interp_p2_f32 v18, [v18], v1, 1, 5, [m0] ; C8491501 v_mad_f32 v17, v16, v18, v17 ; D2820011 04462510 v_mul_f32_e32 v14, v15, v14 ; 101C1D0F v_interp_p1_f32 v15, v0, 1, 7, [m0] ; C83C1D00 v_interp_p2_f32 v15, [v15], v1, 1, 7, [m0] ; C83D1D01 v_mad_f32 v15, v14, v15, v17 ; D282000F 04461F0E v_interp_p1_f32 v17, v0, 0, 6, [m0] ; C8441800 v_interp_p2_f32 v17, [v17], v1, 0, 6, [m0] ; C8451801 v_mul_f32_e32 v17, v17, v9 ; 10221311 v_interp_p1_f32 v18, v0, 0, 5, [m0] ; C8481400 v_interp_p2_f32 v18, [v18], v1, 0, 5, [m0] ; C8491401 v_mad_f32 v17, v16, v18, v17 ; D2820011 04462510 v_interp_p1_f32 v18, v0, 0, 7, [m0] ; C8481C00 v_interp_p2_f32 v18, [v18], v1, 0, 7, [m0] ; C8491C01 v_mad_f32 v17, v14, v18, v17 ; D2820011 0446250E v_mul_f32_e32 v18, v17, v17 ; 10242311 v_mad_f32 v18, v15, v15, v18 ; D2820012 044A1F0F v_interp_p1_f32 v19, v0, 2, 6, [m0] ; C84C1A00 v_interp_p2_f32 v19, [v19], v1, 2, 6, [m0] ; C84D1A01 v_mul_f32_e32 v9, v19, v9 ; 10121313 v_interp_p1_f32 v19, v0, 2, 5, [m0] ; C84C1600 v_interp_p2_f32 v19, [v19], v1, 2, 5, [m0] ; C84D1601 v_mad_f32 v9, v16, v19, v9 ; D2820009 04262710 v_interp_p1_f32 v16, v0, 2, 7, [m0] ; C8401E00 v_interp_p2_f32 v16, [v16], v1, 2, 7, [m0] ; C8411E01 v_mad_f32 v9, v14, v16, v9 ; D2820009 0426210E v_mad_f32 v14, v9, v9, v18 ; D282000E 044A1309 v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E v_mul_f32_e32 v15, v14, v15 ; 101E1F0E v_mul_f32_e32 v16, v14, v17 ; 1020230E v_mul_f32_e32 v17, v16, v16 ; 10222110 v_mad_f32 v17, v15, v15, v17 ; D2820011 04461F0F v_mul_f32_e32 v9, v14, v9 ; 1012130E v_mad_f32 v14, v9, v9, v17 ; D282000E 04461309 v_interp_p1_f32 v17, v0, 1, 2, [m0] ; C8440900 v_interp_p2_f32 v17, [v17], v1, 1, 2, [m0] ; C8450901 v_interp_p1_f32 v18, v0, 0, 2, [m0] ; C8480800 v_interp_p2_f32 v18, [v18], v1, 0, 2, [m0] ; C8490801 v_mul_f32_e32 v19, v18, v18 ; 10262512 v_mad_f32 v19, v17, v17, v19 ; D2820013 044E2311 v_interp_p1_f32 v20, v0, 2, 2, [m0] ; C8500A00 v_interp_p2_f32 v20, [v20], v1, 2, 2, [m0] ; C8510A01 v_mad_f32 v19, v20, v20, v19 ; D2820013 044E2914 v_rsq_clamp_f32_e32 v19, v19 ; 7E265913 v_mul_f32_e32 v21, v19, v20 ; 102A2913 v_mul_f32_e32 v22, v14, v21 ; 102C2B0E v_mul_f32_e32 v23, v19, v18 ; 102E2513 v_mul_f32_e32 v24, v16, v23 ; 10302F10 v_mul_f32_e32 v25, v19, v17 ; 10322313 v_mad_f32 v24, v25, v15, v24 ; D2820018 04621F19 v_mad_f32 v21, v21, v9, v24 ; D2820015 04621315 v_add_f32_e32 v24, v21, v21 ; 06302B15 v_mad_f32 v12, v24, v9, -v22 ; D282000C 845A1318 v_mul_f32_e32 v22, v14, v25 ; 102C330E v_mad_f32 v11, v24, v15, -v22 ; D282000B 845A1F18 v_mul_f32_e32 v14, v14, v23 ; 101C2F0E v_mad_f32 v10, v24, v16, -v14 ; D282000A 843A2118 v_cubeid_f32 v28, v10, v11, v12 ; D288001C 0432170A v_cubema_f32 v27, v10, v11, v12 ; D28E001B 0432170A v_cubesc_f32 v26, v10, v11, v12 ; D28A001A 0432170A v_cubetc_f32 v25, v10, v11, v12 ; D28C0019 0432170A v_rcp_f32_e64 v10, |v27| ; D354010A 0000011B v_mov_b32_e32 v11, 0x3fc00000 ; 7E1602FF 3FC00000 v_mad_f32 v27, v25, v10, v11 ; D282001B 042E1519 v_mad_f32 v26, v26, v10, v11 ; D282001A 042E151A image_sample v[10:12], 7, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[88:95], s[84:87] ; F0800700 02B60A1A s_buffer_load_dword s4, s[0:3], 0x7a ; C202017A s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v13, s4, v11 ; 101A1604 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v13, s5, v13 ; 101A1A05 v_mul_f32_e32 v14, v15, v24 ; 101C310F v_mad_f32 v14, v17, v19, -v14 ; D282000E 843A2711 v_mul_f32_e32 v17, v16, v24 ; 10223110 v_mad_f32 v17, v18, v19, -v17 ; D2820011 84462712 s_buffer_load_dword s6, s[0:3], 0x1c ; C203011C s_buffer_load_dword s7, s[0:3], 0x1d ; C203811D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e64 v18, s7, s7 ; D2100012 00000E07 v_mad_f32 v18, s6, s6, v18 ; D2820012 04480C06 s_buffer_load_dword s8, s[0:3], 0x1e ; C204011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v18, s8, s8, v18 ; D2820012 04481008 v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 v_mul_f32_e64 v22, -s6, v18 ; D2100016 20022406 v_mul_f32_e32 v17, v17, v22 ; 10222D11 v_mul_f32_e64 v23, -s7, v18 ; D2100017 20022407 v_mad_f32 v14, v23, -v14, -v17 ; D282000E C4461D17 v_mul_f32_e32 v17, v9, v24 ; 10223109 v_mad_f32 v17, v20, v19, -v17 ; D2820011 84462714 v_mul_f32_e64 v18, -s8, v18 ; D2100012 20022408 v_mad_f32 v14, -v18, v17, v14 ; D282000E 243A2312 v_add_f32_e64 v14, 0, v14 clamp ; D206080E 00021C80 v_mov_b32_e32 v17, 0xbc23d70a ; 7E2202FF BC23D70A v_add_f32_e32 v17, v14, v17 ; 0622230E v_cmp_ge_f32_e64 s[10:11], v17, 0 ; D00C000A 00010111 v_cndmask_b32_e64 v17, 0, -1, s[10:11] ; D2000811 00298280 v_cmp_ne_i32_e64 s[10:11], v17, 0 ; D10A000A 00010111 v_mov_b32_e32 v17, 0x3c23d70a ; 7E2202FF 3C23D70A v_cndmask_b32_e64 v14, v17, v14, s[10:11] ; D200000E 082A1D11 v_add_f32_e64 v14, 0, v14 clamp ; D206080E 00021C80 v_log_f32_e32 v14, v14 ; 7E1C4F0E image_sample v[24:27], 15, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[76:83], s[72:75] ; F0800F00 02531807 s_buffer_load_dword s9, s[0:3], 0x6f ; C204816F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_max_f32_e32 v17, s9, v27 ; 20223609 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v17, s9, v17 ; 10222209 v_mul_legacy_f32_e32 v14, v17, v14 ; 0E1C1D11 v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mul_f32_e32 v17, v22, v16 ; 10222116 v_mad_f32 v17, v15, v23, v17 ; D2820011 04462F0F v_mad_f32 v17, v9, v18, v17 ; D2820011 04462509 v_add_f32_e64 v18, 0, v17 clamp ; D2060812 00022280 v_mul_f32_e32 v14, v14, v18 ; 101C250E v_interp_p1_f32 v18, v0, 0, 8, [m0] ; C8482000 v_interp_p2_f32 v18, [v18], v1, 0, 8, [m0] ; C8492001 v_mov_b32_e32 v19, 0xba800000 ; 7E2602FF BA800000 v_add_f32_e32 v29, v19, v18 ; 063A2513 v_interp_p1_f32 v20, v0, 2, 8, [m0] ; C8502200 v_interp_p2_f32 v20, [v20], v1, 2, 8, [m0] ; C8512201 v_add_f32_e32 v28, 0, v20 ; 06382880 v_interp_p1_f32 v20, v0, 1, 8, [m0] ; C8502100 v_interp_p2_f32 v20, [v20], v1, 1, 8, [m0] ; C8512101 v_add_f32_e32 v30, 0, v20 ; 063C2880 s_load_dwordx8 s[12:19], vcc, 0x50 ; C0C66B50 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_c v22, 1, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[12:19], s[68:71] ; F0A00100 0223161C v_mov_b32_e32 v23, 0x3dfd1fa4 ; 7E2E02FF 3DFD1FA4 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v22, 0x3dfd1fa4, v22 ; 102C2CFF 3DFD1FA4 v_add_f32_e32 v32, 0x3a800000, v18 ; 064024FF 3A800000 v_mov_b32_e32 v33, v28 ; 7E42031C v_mov_b32_e32 v34, v29 ; 7E44031D v_mov_b32_e32 v35, v30 ; 7E46031E v_mov_b32_e32 v36, v31 ; 7E48031F v_mov_b32_e32 v34, v32 ; 7E440320 v_mov_b32_e32 v35, v30 ; 7E46031E image_sample_c v32, 1, 0, 0, 0, 0, 0, 0, 0, v[33:36], s[12:19], s[68:71] ; F0A00100 02232021 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v22, v32, v23, v22 ; D2820016 045A2F20 v_add_f32_e32 v18, 0, v18 ; 06242480 v_mov_b32_e32 v37, v28 ; 7E4A031C v_mov_b32_e32 v38, v29 ; 7E4C031D v_mov_b32_e32 v39, v30 ; 7E4E031E v_mov_b32_e32 v40, v31 ; 7E50031F v_mov_b32_e32 v38, v18 ; 7E4C0312 v_add_f32_e32 v18, v19, v20 ; 06242913 v_mov_b32_e32 v39, v18 ; 7E4E0312 image_sample_c v19, 1, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[12:19], s[68:71] ; F0A00100 02231325 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v19, v19, v23, v22 ; D2820013 045A2F13 v_add_f32_e32 v39, 0x3a800000, v20 ; 064E28FF 3A800000 image_sample_c v20, 1, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[12:19], s[68:71] ; F0A00100 02231425 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v19, v20, v23, v19 ; D2820013 044E2F14 v_mov_b32_e32 v41, v28 ; 7E52031C v_mov_b32_e32 v42, v29 ; 7E54031D v_mov_b32_e32 v43, v30 ; 7E56031E v_mov_b32_e32 v44, v31 ; 7E58031F v_mov_b32_e32 v43, v39 ; 7E560327 image_sample_c v20, 1, 0, 0, 0, 0, 0, 0, 0, v[41:44], s[12:19], s[68:71] ; F0A00100 02231429 v_mov_b32_e32 v22, 0x3d996882 ; 7E2C02FF 3D996882 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v20, 0x3d996882, v20 ; 102828FF 3D996882 v_mov_b32_e32 v35, v39 ; 7E460327 image_sample_c v23, 1, 0, 0, 0, 0, 0, 0, 0, v[33:36], s[12:19], s[68:71] ; F0A00100 02231721 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v23, v22, v20 ; D2820014 04522D17 v_mov_b32_e32 v35, v18 ; 7E460312 image_sample_c v23, 1, 0, 0, 0, 0, 0, 0, 0, v[33:36], s[12:19], s[68:71] ; F0A00100 02231721 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v23, v22, v20 ; D2820014 04522D17 v_mov_b32_e32 v32, v28 ; 7E40031C v_mov_b32_e32 v33, v29 ; 7E42031D v_mov_b32_e32 v34, v30 ; 7E44031E v_mov_b32_e32 v35, v31 ; 7E46031F v_mov_b32_e32 v34, v18 ; 7E440312 image_sample_c v18, 1, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[12:19], s[68:71] ; F0A00100 02231220 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v18, v18, v22, v20 ; D2820012 04522D12 v_add_f32_e32 v18, v19, v18 ; 06242513 v_mov_b32_e32 v39, v30 ; 7E4E031E image_sample_c v19, 1, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[12:19], s[68:71] ; F0A00100 02231325 v_mov_b32_e32 v20, 0x3e52efb3 ; 7E2802FF 3E52EFB3 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v18, v20, v19, v18 ; D2820012 044A2714 v_interp_p1_f32 v19, v0, 1, 4, [m0] ; C84C1100 v_interp_p2_f32 v19, [v19], v1, 1, 4, [m0] ; C84D1101 v_interp_p1_f32 v20, v0, 2, 4, [m0] ; C8501200 v_interp_p2_f32 v20, [v20], v1, 2, 4, [m0] ; C8511201 v_mov_b32_e32 v22, 0xc69c4000 ; 7E2C02FF C69C4000 v_add_f32_e32 v20, v20, v22 ; 06282D14 v_rcp_f32_e32 v22, s8 ; 7E2C5408 v_mul_f32_e64 v20, -v20, v22 ; D2100014 20022D14 v_mad_f32 v19, s7, v20, v19 ; D2820013 044E2807 s_buffer_load_dword s7, s[0:3], 0x1b ; C203811B s_buffer_load_dword s8, s[0:3], 0x5b ; C204015B s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v22, s8 ; 7E2C5408 v_mad_f32 v29, v19, v22, s7 ; D282001D 001E2D13 v_interp_p1_f32 v23, v0, 0, 4, [m0] ; C85C1000 v_interp_p2_f32 v23, [v23], v1, 0, 4, [m0] ; C85D1001 v_mad_f32 v20, s6, v20, v23 ; D2820014 045E2806 s_buffer_load_dword s6, s[0:3], 0x1a ; C203011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v28, v20, v22, s6 ; D282001C 001A2D14 s_load_dwordx8 s[8:15], vcc, 0x48 ; C0C46B48 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v23, 2, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[8:15], s[64:67] ; F0800200 0202171C s_buffer_load_dword s6, s[0:3], 0x19 ; C2030119 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v29, v19, v22, s6 ; D282001D 001A2D13 s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v28, v20, v22, s6 ; D282001C 001A2D14 image_sample v19, 1, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[8:15], s[64:67] ; F0800100 0202131C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v20, v23, v19 ; 10282717 s_buffer_load_dword s6, s[0:3], 0x71 ; C2030171 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v22, s6, v20 ; 102C2806 v_mul_f32_e32 v22, v22, v18 ; 102C2516 v_mul_f32_e32 v28, v14, v22 ; 10382D0E s_buffer_load_dword s8, s[0:3], 0x3 ; C2040103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, v28, s8, v13 ; D282000D 0434111C s_buffer_load_dword s6, s[0:3], 0x6c ; C203016C s_waitcnt lgkmcnt(0) ; BF8C007F v_max_f32_e32 v28, s6, v24 ; 20383006 v_mul_f32_e32 v13, v13, v28 ; 101A390D s_buffer_load_dword s6, s[0:3], 0x6e ; C203016E s_waitcnt lgkmcnt(0) ; BF8C007F v_max_f32_e32 v29, s6, v26 ; 203A3406 v_sub_f32_e32 v30, 1.0, v29 ; 083C3AF2 v_mul_f32_e32 v31, v4, v30 ; 103E3D04 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v31, v29, s6, v31 ; D282001F 047C0D1D v_mul_f32_e32 v13, v31, v13 ; 101A1B1F v_add_f32_e64 v31, 0, v21 clamp ; D206081F 00022A80 v_mov_b32_e32 v32, 0.5 ; 7E4002F0 s_load_dwordx8 s[12:19], vcc, 0x40 ; C0C66B40 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[31:33], 7, 0, 0, 0, 0, 0, 0, 0, v[31:32], s[12:19], s[52:55] ; F0800700 01A31F1F image_sample v[34:37], 15, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[56:63], s[48:51] ; F0800F00 018E2207 s_buffer_load_dword s6, s[0:3], 0x6a ; C203016A s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_max_f32_e32 v7, s6, v36 ; 200E4806 v_max_f32_e32 v8, v7, v33 ; 20104307 v_mul_f32_e32 v21, v8, v13 ; 102A1B08 s_buffer_load_dword s6, s[0:3], 0x58 ; C2030158 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v16, s6, v16 ; 10202006 s_buffer_load_dword s6, s[0:3], 0x59 ; C2030159 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v15, s6, v15, v16 ; D282000F 04421E06 s_buffer_load_dword s6, s[0:3], 0x5a ; C203015A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v15, s6, v9, v15 ; D282000F 043E1206 v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 s_buffer_load_dword s6, s[0:3], 0x51 ; C2030151 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v16, s6, v15 ; 10201E06 v_mad_f32 v19, -v19, v23, 1.0 ; D2820013 23CA2F13 v_sub_f32_e32 v23, 1.0, v18 ; 082E24F2 v_max_f32_e32 v19, v19, v23 ; 20262F13 s_buffer_load_dword s6, s[0:3], 0x55 ; C2030155 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v23, s6, v19 ; 102E2606 v_interp_p1_f32 v38, v0, 0, 1, [m0] ; C8980400 v_interp_p2_f32 v38, [v38], v1, 0, 1, [m0] ; C8990401 v_mul_f32_e32 v23, v38, v23 ; 102E2F26 v_mad_f32 v16, v16, v38, v23 ; D2820010 045E4D10 v_mad_f32 v17, 0.5, v17, 0.5 ; D2820011 03C222F0 v_interp_p1_f32 v23, v0, 3, 8, [m0] ; C85C2300 v_interp_p2_f32 v23, [v23], v1, 3, 8, [m0] ; C85D2301 v_cmp_ge_f32_e64 s[6:7], -v23, 0 ; D00C0006 20010117 v_cndmask_b32_e64 v23, 0, -1, s[6:7] ; D2000817 00198280 v_cmp_ne_i32_e64 s[6:7], v23, 0 ; D10A0006 00010117 v_cndmask_b32_e64 v23, 1.0, 0, s[6:7] ; D2000817 101900F2 v_mul_f32_e32 v23, v23, v18 ; 102E2517 v_mul_f32_e32 v17, v17, v23 ; 10222F11 v_mad_f32 v16, v17, v22, v16 ; D2820010 04422D11 s_buffer_load_dword s6, s[0:3], 0xd ; C203010D s_waitcnt lgkmcnt(0) ; BF8C007F v_max_f32_e32 v22, s6, v35 ; 202C4606 v_mul_f32_e32 v22, v22, v32 ; 102C4116 v_interp_p1_f32 v40, v0, 3, 2, [m0] ; C8A00B00 v_interp_p2_f32 v40, [v40], v1, 3, 2, [m0] ; C8A10B01 v_interp_p1_f32 v39, v0, 3, 1, [m0] ; C89C0700 v_interp_p2_f32 v39, [v39], v1, 3, 1, [m0] ; C89D0701 image_sample v[39:41], 7, 0, 0, 0, 0, 0, 0, 0, v[39:40], s[40:47], s[36:39] ; F0800700 012A2727 s_buffer_load_dword s6, s[0:3], 0x22 ; C2030122 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v23, s6, v41 ; 102E5206 v_mad_f32 v23, 2.0, v23, -1.0 ; D2820017 03CE2EF4 s_buffer_load_dword s6, s[0:3], 0x68 ; C2030168 s_waitcnt lgkmcnt(0) ; BF8C007F v_max_f32_e32 v42, s6, v34 ; 20544406 s_buffer_load_dword s6, s[0:3], 0xe ; C203010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v42, s6, v42 ; 10545406 v_mad_f32 v23, v42, v23, 1.0 ; D2820017 03CA2F2A v_mul_f32_e32 v43, v23, v5 ; 10560B17 v_log_f32_e32 v44, v43 ; 7E584F2B v_mul_f32_e32 v44, 0x3ee8b439, v44 ; 105858FF 3EE8B439 v_exp_f32_e32 v46, v44 ; 7E5C4B2C s_buffer_load_dword s6, s[0:3], 0x21 ; C2030121 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v48, s6, v40 ; 10605006 v_mad_f32 v48, 2.0, v48, -1.0 ; D2820030 03CE60F4 v_mad_f32 v48, v42, v48, 1.0 ; D2820030 03CA612A v_mul_f32_e32 v49, v48, v4 ; 10620930 v_log_f32_e32 v50, v49 ; 7E644F31 v_mul_f32_e32 v50, 0x3ee8b439, v50 ; 106464FF 3EE8B439 v_exp_f32_e32 v45, v50 ; 7E5A4B32 s_buffer_load_dword s6, s[0:3], 0x20 ; C2030120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v39, s6, v39 ; 104E4E06 v_mad_f32 v39, 2.0, v39, -1.0 ; D2820027 03CE4EF4 v_mad_f32 v39, v42, v39, 1.0 ; D2820027 03CA4F2A v_mul_f32_e32 v40, v39, v3 ; 10500727 v_log_f32_e32 v41, v40 ; 7E524F28 v_mul_f32_e32 v41, 0x3ee8b439, v41 ; 105252FF 3EE8B439 v_exp_f32_e32 v44, v41 ; 7E584B29 image_sample v[44:46], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[28:35], s[24:27] ; F0800700 00C72C2C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v41, -v4, v48, v45 ; D2820029 24B66104 v_mad_f32 v41, v22, v41, v49 ; D2820029 04C65316 v_interp_p1_f32 v48, v0, 3, 0, [m0] ; C8C00300 v_interp_p2_f32 v48, [v48], v1, 3, 0, [m0] ; C8C10301 v_interp_p1_f32 v47, v0, 2, 0, [m0] ; C8BC0200 v_interp_p2_f32 v47, [v47], v1, 2, 0, [m0] ; C8BD0201 v_readlane_b32 s12, v51, 0 ; 02190133 v_readlane_b32 s13, v51, 1 ; 021B0333 v_readlane_b32 s14, v51, 2 ; 021D0533 v_readlane_b32 s15, v51, 3 ; 021F0733 s_nop 2 ; BF800002 v_readlane_b32 s16, v51, 4 ; 02210933 v_readlane_b32 s17, v51, 5 ; 02230B33 v_readlane_b32 s18, v51, 6 ; 02250D33 v_readlane_b32 s19, v51, 7 ; 02270F33 v_readlane_b32 s20, v51, 8 ; 02291133 v_readlane_b32 s21, v51, 9 ; 022B1333 v_readlane_b32 s22, v51, 10 ; 022D1533 v_readlane_b32 s23, v51, 11 ; 022F1733 s_nop 2 ; BF800002 image_sample v[47:49], 7, 0, 0, 0, 0, 0, 0, 0, v[47:48], s[16:23], s[12:15] ; F0800700 00642F2F s_buffer_load_dword s6, s[0:3], 0xf ; C203010F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v41, v48, s6, v41 ; D2820029 04A40D30 v_mad_f32 v16, v16, v41, v21 ; D2820010 04565310 v_mad_f32 v13, v13, v8, -v16 ; D282000D 8442110D v_mad_f32 v13, v7, v13, v16 ; D282000D 04421B07 v_max_f32_e32 v9, 0, v9 ; 20121280 s_buffer_load_dword s7, s[0:3], 0x6d ; C203816D s_waitcnt lgkmcnt(0) ; BF8C007F v_max_f32_e32 v16, s7, v25 ; 20203207 v_interp_p1_f32 v21, v0, 1, 3, [m0] ; C8540D00 v_interp_p2_f32 v21, [v21], v1, 1, 3, [m0] ; C8550D01 v_mul_f32_e32 v21, v21, v16 ; 102A2115 v_mul_f32_e32 v21, v21, v9 ; 102A1315 v_mad_f32 v13, v21, v31, v13 ; D282000D 04363F15 s_buffer_load_dword s7, s[0:3], 0x6b ; C203816B s_waitcnt lgkmcnt(0) ; BF8C007F v_max_f32_e32 v21, s7, v37 ; 202A4A07 v_add_f32_e64 v21, 0, v21 clamp ; D2060815 00022A80 v_sub_f32_e32 v24, 1.0, v21 ; 08302AF2 v_mul_f32_e32 v13, v13, v24 ; 101A310D v_mad_f32 v13, v21, v41, v13 ; D282000D 04365315 s_buffer_load_dword s7, s[0:3], 0x75 ; C2038175 s_buffer_load_dword s9, s[0:3], 0x78 ; C2048178 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v25, s7 ; 7E320207 v_mad_f32 v25, -v13, s9, v25 ; D2820019 2464130D v_mul_f32_e32 v13, s9, v13 ; 101A1A09 v_interp_p1_f32 v26, v0, 3, 3, [m0] ; C8680F00 v_interp_p2_f32 v26, [v26], v1, 3, 3, [m0] ; C8690F01 v_mul_f32_e32 v26, v26, v26 ; 1034351A v_mad_f32 v13, v26, v25, v13 ; D282000D 0436331A v_mul_f32_e32 v25, s4, v10 ; 10321404 v_mul_f32_e32 v25, s5, v25 ; 10323205 s_buffer_load_dword s7, s[0:3], 0x70 ; C2038170 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v27, s7, v20 ; 10362807 v_mul_f32_e32 v27, v27, v18 ; 1036251B v_mul_f32_e32 v34, v14, v27 ; 1044370E v_mad_f32 v25, v34, s8, v25 ; D2820019 04641122 v_mul_f32_e32 v25, v25, v28 ; 10323919 v_mul_f32_e32 v34, v3, v30 ; 10443D03 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v34, v29, s7, v34 ; D2820022 04880F1D v_mul_f32_e32 v25, v34, v25 ; 10323322 v_mul_f32_e32 v34, v8, v25 ; 10443308 s_buffer_load_dword s7, s[0:3], 0x50 ; C2038150 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v35, s7, v15 ; 10461E07 s_buffer_load_dword s7, s[0:3], 0x54 ; C2038154 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v36, s7, v19 ; 10482607 v_mul_f32_e32 v36, v38, v36 ; 10484926 v_mad_f32 v35, v35, v38, v36 ; D2820023 04924D23 v_mad_f32 v27, v17, v27, v35 ; D282001B 048E3711 v_mad_f32 v35, -v3, v39, v44 ; D2820023 24B24F03 v_mad_f32 v35, v22, v35, v40 ; D2820023 04A24716 v_mad_f32 v35, v47, s6, v35 ; D2820023 048C0D2F v_mad_f32 v27, v27, v35, v34 ; D282001B 048A471B v_mad_f32 v25, v25, v8, -v27 ; D2820019 846E1119 v_mad_f32 v25, v7, v25, v27 ; D2820019 046E3307 v_interp_p1_f32 v27, v0, 0, 3, [m0] ; C86C0C00 v_interp_p2_f32 v27, [v27], v1, 0, 3, [m0] ; C86D0C01 v_mul_f32_e32 v27, v27, v16 ; 1036211B v_mul_f32_e32 v27, v27, v9 ; 1036131B v_mad_f32 v25, v27, v31, v25 ; D2820019 04663F1B v_mul_f32_e32 v25, v25, v24 ; 10323119 v_mad_f32 v25, v21, v35, v25 ; D2820019 04664715 s_buffer_load_dword s7, s[0:3], 0x74 ; C2038174 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v27, s7 ; 7E360207 v_mad_f32 v27, -v25, s9, v27 ; D282001B 246C1319 v_mul_f32_e32 v25, s9, v25 ; 10323209 v_mad_f32 v25, v26, v27, v25 ; D2820019 0466371A v_cvt_pkrtz_f16_f32_e32 v13, v25, v13 ; 5E1A1B19 v_mul_f32_e32 v10, s4, v12 ; 10141804 v_mul_f32_e32 v10, s5, v10 ; 10141405 s_buffer_load_dword s4, s[0:3], 0x72 ; C2020172 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v11, s4, v20 ; 10162804 v_mul_f32_e32 v11, v11, v18 ; 1016250B v_mul_f32_e32 v12, v14, v11 ; 1018170E v_mad_f32 v10, v12, s8, v10 ; D282000A 0428110C v_mul_f32_e32 v10, v10, v28 ; 1014390A v_mul_f32_e32 v12, v5, v30 ; 10183D05 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v12, v29, s4, v12 ; D282000C 0430091D v_mul_f32_e32 v10, v12, v10 ; 1014150C v_mul_f32_e32 v12, v8, v10 ; 10181508 s_buffer_load_dword s4, s[0:3], 0x52 ; C2020152 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v14, s4, v15 ; 101C1E04 s_buffer_load_dword s4, s[0:3], 0x56 ; C2020156 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v15, s4, v19 ; 101E2604 v_mul_f32_e32 v15, v38, v15 ; 101E1F26 v_mad_f32 v14, v14, v38, v15 ; D282000E 043E4D0E v_mad_f32 v11, v17, v11, v14 ; D282000B 043A1711 v_mad_f32 v3, -v5, v23, v46 ; D2820003 24BA2F05 v_mad_f32 v3, v22, v3, v43 ; D2820003 04AE0716 v_mad_f32 v3, v49, s6, v3 ; D2820003 040C0D31 v_mad_f32 v4, v11, v3, v12 ; D2820004 0432070B v_mad_f32 v5, v10, v8, -v4 ; D2820005 8412110A v_mad_f32 v4, v7, v5, v4 ; D2820004 04120B07 v_interp_p1_f32 v5, v0, 2, 3, [m0] ; C8140E00 v_interp_p2_f32 v5, [v5], v1, 2, 3, [m0] ; C8150E01 v_mul_f32_e32 v0, v5, v16 ; 10002105 v_mul_f32_e32 v0, v0, v9 ; 10001300 v_mad_f32 v0, v0, v31, v4 ; D2820000 04123F00 v_mul_f32_e32 v0, v0, v24 ; 10003100 v_mad_f32 v0, v21, v3, v0 ; D2820000 04020715 s_buffer_load_dword s0, s[0:3], 0x76 ; C2000176 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s0 ; 7E020200 v_mad_f32 v1, -v0, s9, v1 ; D2820001 24041300 v_mul_f32_e32 v0, s9, v0 ; 10000009 v_mad_f32 v0, v26, v1, v0 ; D2820000 0402031A v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v13, v0, v13, v0 ; F8001C0F 000D000D s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1], CLIPVERTEX DCL OUT[2], GENERIC[9] DCL CONST[0..70] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 2.2000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: DP4 TEMP[0].x, IN[3], CONST[48] 2: DP4 TEMP[1].x, IN[3], CONST[49] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MAD TEMP[1].xyz, IN[4].xyzz, CONST[66].xxxx, IN[0].xyzz 5: MOV TEMP[1].w, IN[0].wwww 6: DP4 TEMP[2].x, TEMP[1], CONST[67] 7: DP4 TEMP[3].x, TEMP[1], CONST[68] 8: MOV TEMP[2].y, TEMP[3].xxxx 9: DP4 TEMP[1].x, TEMP[1], CONST[69] 10: MOV TEMP[2].z, TEMP[1].xxxx 11: MOV TEMP[2].w, CONST[0].yyyy 12: DP4 TEMP[1].x, TEMP[2], CONST[8] 13: DP4 TEMP[3].x, TEMP[2], CONST[9] 14: MOV TEMP[1].y, TEMP[3].xxxx 15: DP4 TEMP[4].x, TEMP[2], CONST[10] 16: MOV TEMP[1].z, TEMP[4].xxxx 17: DP4 TEMP[2].x, TEMP[2], CONST[11] 18: MOV TEMP[1].w, TEMP[2].xxxx 19: MOV TEMP[5], TEMP[1] 20: MAD TEMP[4].x, TEMP[4].xxxx, CONST[0].zzzz, -TEMP[2].xxxx 21: MOV TEMP[1].z, TEMP[4].xxxx 22: MOV TEMP[1].y, -TEMP[3].xxxx 23: MAD TEMP[1].xy, CONST[70].xyyy, TEMP[2].xxxx, TEMP[1].xyyy 24: MOV OUT[2], TEMP[0] 25: MOV OUT[0], TEMP[1] 26: MOV OUT[1], TEMP[5] 27: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 768) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 772) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 776) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 780) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 784) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 788) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 792) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 796) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1056) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1072) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1076) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1080) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1084) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1088) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1092) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1096) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1100) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1104) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1108) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1112) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1116) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1120) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 1124) %54 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %55 = load <16 x i8> addrspace(2)* %54, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = extractelement <4 x float> %57, i32 3 %62 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0 %64 = add i32 %5, %7 %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %64) %66 = extractelement <4 x float> %65, i32 0 %67 = extractelement <4 x float> %65, i32 1 %68 = extractelement <4 x float> %65, i32 2 %69 = extractelement <4 x float> %65, i32 3 %70 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0 %72 = add i32 %5, %7 %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %72) %74 = extractelement <4 x float> %73, i32 0 %75 = extractelement <4 x float> %73, i32 1 %76 = extractelement <4 x float> %73, i32 2 %77 = fmul float %66, %31 %78 = fmul float %67, %32 %79 = fadd float %77, %78 %80 = fmul float %68, %33 %81 = fadd float %79, %80 %82 = fmul float %69, %34 %83 = fadd float %81, %82 %84 = fmul float %66, %35 %85 = fmul float %67, %36 %86 = fadd float %84, %85 %87 = fmul float %68, %37 %88 = fadd float %86, %87 %89 = fmul float %69, %38 %90 = fadd float %88, %89 %91 = fmul float %74, %39 %92 = fadd float %91, %58 %93 = fmul float %75, %39 %94 = fadd float %93, %59 %95 = fmul float %76, %39 %96 = fadd float %95, %60 %97 = fmul float %92, %40 %98 = fmul float %94, %41 %99 = fadd float %97, %98 %100 = fmul float %96, %42 %101 = fadd float %99, %100 %102 = fmul float %61, %43 %103 = fadd float %101, %102 %104 = fmul float %92, %44 %105 = fmul float %94, %45 %106 = fadd float %104, %105 %107 = fmul float %96, %46 %108 = fadd float %106, %107 %109 = fmul float %61, %47 %110 = fadd float %108, %109 %111 = fmul float %92, %48 %112 = fmul float %94, %49 %113 = fadd float %111, %112 %114 = fmul float %96, %50 %115 = fadd float %113, %114 %116 = fmul float %61, %51 %117 = fadd float %115, %116 %118 = fmul float %103, %15 %119 = fmul float %110, %16 %120 = fadd float %118, %119 %121 = fmul float %117, %17 %122 = fadd float %120, %121 %123 = fmul float %13, %18 %124 = fadd float %122, %123 %125 = fmul float %103, %19 %126 = fmul float %110, %20 %127 = fadd float %125, %126 %128 = fmul float %117, %21 %129 = fadd float %127, %128 %130 = fmul float %13, %22 %131 = fadd float %129, %130 %132 = fmul float %103, %23 %133 = fmul float %110, %24 %134 = fadd float %132, %133 %135 = fmul float %117, %25 %136 = fadd float %134, %135 %137 = fmul float %13, %26 %138 = fadd float %136, %137 %139 = fmul float %103, %27 %140 = fmul float %110, %28 %141 = fadd float %139, %140 %142 = fmul float %117, %29 %143 = fadd float %141, %142 %144 = fmul float %13, %30 %145 = fadd float %143, %144 %146 = fsub float -0.000000e+00, %145 %147 = fmul float %138, %14 %148 = fadd float %147, %146 %149 = fsub float -0.000000e+00, %131 %150 = fmul float %52, %145 %151 = fadd float %150, %124 %152 = fmul float %53, %145 %153 = fadd float %152, %149 %154 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 16 %155 = load <16 x i8> addrspace(2)* %154, !tbaa !0 %156 = call float @llvm.SI.load.const(<16 x i8> %155, i32 0) %157 = fmul float %156, %124 %158 = call float @llvm.SI.load.const(<16 x i8> %155, i32 4) %159 = fmul float %158, %131 %160 = fadd float %157, %159 %161 = call float @llvm.SI.load.const(<16 x i8> %155, i32 8) %162 = fmul float %161, %138 %163 = fadd float %160, %162 %164 = call float @llvm.SI.load.const(<16 x i8> %155, i32 12) %165 = fmul float %164, %145 %166 = fadd float %163, %165 %167 = call float @llvm.SI.load.const(<16 x i8> %155, i32 16) %168 = fmul float %167, %124 %169 = call float @llvm.SI.load.const(<16 x i8> %155, i32 20) %170 = fmul float %169, %131 %171 = fadd float %168, %170 %172 = call float @llvm.SI.load.const(<16 x i8> %155, i32 24) %173 = fmul float %172, %138 %174 = fadd float %171, %173 %175 = call float @llvm.SI.load.const(<16 x i8> %155, i32 28) %176 = fmul float %175, %145 %177 = fadd float %174, %176 %178 = call float @llvm.SI.load.const(<16 x i8> %155, i32 32) %179 = fmul float %178, %124 %180 = call float @llvm.SI.load.const(<16 x i8> %155, i32 36) %181 = fmul float %180, %131 %182 = fadd float %179, %181 %183 = call float @llvm.SI.load.const(<16 x i8> %155, i32 40) %184 = fmul float %183, %138 %185 = fadd float %182, %184 %186 = call float @llvm.SI.load.const(<16 x i8> %155, i32 44) %187 = fmul float %186, %145 %188 = fadd float %185, %187 %189 = call float @llvm.SI.load.const(<16 x i8> %155, i32 48) %190 = fmul float %189, %124 %191 = call float @llvm.SI.load.const(<16 x i8> %155, i32 52) %192 = fmul float %191, %131 %193 = fadd float %190, %192 %194 = call float @llvm.SI.load.const(<16 x i8> %155, i32 56) %195 = fmul float %194, %138 %196 = fadd float %193, %195 %197 = call float @llvm.SI.load.const(<16 x i8> %155, i32 60) %198 = fmul float %197, %145 %199 = fadd float %196, %198 %200 = call float @llvm.SI.load.const(<16 x i8> %155, i32 64) %201 = fmul float %200, %124 %202 = call float @llvm.SI.load.const(<16 x i8> %155, i32 68) %203 = fmul float %202, %131 %204 = fadd float %201, %203 %205 = call float @llvm.SI.load.const(<16 x i8> %155, i32 72) %206 = fmul float %205, %138 %207 = fadd float %204, %206 %208 = call float @llvm.SI.load.const(<16 x i8> %155, i32 76) %209 = fmul float %208, %145 %210 = fadd float %207, %209 %211 = call float @llvm.SI.load.const(<16 x i8> %155, i32 80) %212 = fmul float %211, %124 %213 = call float @llvm.SI.load.const(<16 x i8> %155, i32 84) %214 = fmul float %213, %131 %215 = fadd float %212, %214 %216 = call float @llvm.SI.load.const(<16 x i8> %155, i32 88) %217 = fmul float %216, %138 %218 = fadd float %215, %217 %219 = call float @llvm.SI.load.const(<16 x i8> %155, i32 92) %220 = fmul float %219, %145 %221 = fadd float %218, %220 %222 = call float @llvm.SI.load.const(<16 x i8> %155, i32 96) %223 = fmul float %222, %124 %224 = call float @llvm.SI.load.const(<16 x i8> %155, i32 100) %225 = fmul float %224, %131 %226 = fadd float %223, %225 %227 = call float @llvm.SI.load.const(<16 x i8> %155, i32 104) %228 = fmul float %227, %138 %229 = fadd float %226, %228 %230 = call float @llvm.SI.load.const(<16 x i8> %155, i32 108) %231 = fmul float %230, %145 %232 = fadd float %229, %231 %233 = call float @llvm.SI.load.const(<16 x i8> %155, i32 112) %234 = fmul float %233, %124 %235 = call float @llvm.SI.load.const(<16 x i8> %155, i32 116) %236 = fmul float %235, %131 %237 = fadd float %234, %236 %238 = call float @llvm.SI.load.const(<16 x i8> %155, i32 120) %239 = fmul float %238, %138 %240 = fadd float %237, %239 %241 = call float @llvm.SI.load.const(<16 x i8> %155, i32 124) %242 = fmul float %241, %145 %243 = fadd float %240, %242 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %83, float %90, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %151, float %153, float %148, float %145) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %166, float %177, float %188, float %199) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %210, float %221, float %232, float %243) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[4:7], s[8:9], 0xc ; C082090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s0, s[4:7], 0xc5 ; C20005C5 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s0, v2 ; 100A0400 s_buffer_load_dword s0, s[4:7], 0xc4 ; C20005C4 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v1, s0, v5 ; D2820005 04140101 s_buffer_load_dword s0, s[4:7], 0xc6 ; C20005C6 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s0, v5 ; D2820005 04140103 s_buffer_load_dword s0, s[4:7], 0xc7 ; C20005C7 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s0, v5 ; D2820005 04140104 s_buffer_load_dword s0, s[4:7], 0xc1 ; C20005C1 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s0, v2 ; 100C0400 s_buffer_load_dword s0, s[4:7], 0xc0 ; C20005C0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s0, v6 ; D2820006 04180101 s_buffer_load_dword s0, s[4:7], 0xc2 ; C20005C2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s0, v6 ; D2820006 04180103 s_buffer_load_dword s0, s[4:7], 0xc3 ; C20005C3 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s0, v6 ; D2820001 04180104 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 32, 0, 0, 0, v1, v5, v2, v2 ; F800020F 02020501 s_movk_i32 s0, 0x420 ; B0000420 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v0, v5, s0, v1 ; D2820000 04040105 v_mad_f32 v9, v6, s0, v2 ; D2820009 04080106 s_movk_i32 s1, 0x434 ; B0010434 s_buffer_load_dword s1, s[4:7], s1 ; C2008401 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s1, v9 ; 10141201 s_movk_i32 s1, 0x430 ; B0010430 s_buffer_load_dword s1, s[4:7], s1 ; C2008401 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v0, s1, v10 ; D282000A 04280300 v_mad_f32 v5, v7, s0, v3 ; D2820005 040C0107 s_movk_i32 s0, 0x438 ; B0000438 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v5, s0, v10 ; D2820006 04280105 s_movk_i32 s0, 0x43c ; B000043C s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v4, s0, v6 ; D2820006 04180104 s_movk_i32 s0, 0x444 ; B0000444 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s0, v9 ; 100E1200 s_movk_i32 s0, 0x440 ; B0000440 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s0, v7 ; D2820007 041C0100 s_movk_i32 s0, 0x448 ; B0000448 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v5, s0, v7 ; D2820007 041C0105 s_movk_i32 s0, 0x44c ; B000044C s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v4, s0, v7 ; D2820007 041C0104 s_buffer_load_dword s0, s[4:7], 0x2d ; C200052D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s0, v7 ; 10100E00 s_buffer_load_dword s0, s[4:7], 0x2c ; C200052C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v6, s0, v8 ; D2820008 04200106 s_movk_i32 s0, 0x454 ; B0000454 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s0, v9 ; 10121200 s_movk_i32 s0, 0x450 ; B0000450 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s0, v9 ; D2820000 04240100 s_movk_i32 s0, 0x458 ; B0000458 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v5, s0, v0 ; D2820000 04000105 s_movk_i32 s0, 0x45c ; B000045C s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s0, v0 ; D2820000 04000104 s_buffer_load_dword s0, s[4:7], 0x2e ; C200052E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v0, s0, v8 ; D2820001 04200100 s_buffer_load_dword s0, s[4:7], 0x2f ; C200052F s_buffer_load_dword s1, s[4:7], 0x1 ; C2008501 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s0 ; 7E040200 v_mad_f32 v1, v2, s1, v1 ; D2820001 04040302 s_buffer_load_dword s0, s[4:7], 0x29 ; C2000529 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s0, v7 ; 10040E00 s_buffer_load_dword s0, s[4:7], 0x28 ; C2000528 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v6, s0, v2 ; D2820002 04080106 s_buffer_load_dword s0, s[4:7], 0x2a ; C200052A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s0, v2 ; D2820002 04080100 s_buffer_load_dword s0, s[4:7], 0x2b ; C200052B s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s0 ; 7E060200 v_mad_f32 v2, v3, s1, v2 ; D2820002 04080303 s_buffer_load_dword s0, s[4:7], 0x2 ; C2000502 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v2, s0, -v1 ; D2820003 84040102 s_buffer_load_dword s0, s[4:7], 0x25 ; C2000525 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s0, v7 ; 10080E00 s_buffer_load_dword s0, s[4:7], 0x24 ; C2000524 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v6, s0, v4 ; D2820004 04100106 s_buffer_load_dword s0, s[4:7], 0x26 ; C2000526 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s0, v4 ; D2820004 04100100 s_buffer_load_dword s0, s[4:7], 0x27 ; C2000527 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s0 ; 7E0A0200 v_mad_f32 v4, v5, s1, v4 ; D2820004 04100305 s_movk_i32 s0, 0x464 ; B0000464 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s0, v1, -v4 ; D2820005 84120200 s_buffer_load_dword s0, s[4:7], 0x21 ; C2000521 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s0, v7 ; 100E0E00 s_buffer_load_dword s0, s[4:7], 0x20 ; C2000520 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v6, s0, v7 ; D2820006 041C0106 s_buffer_load_dword s0, s[4:7], 0x22 ; C2000522 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s0, v6 ; D2820000 04180100 s_buffer_load_dword s0, s[4:7], 0x23 ; C2000523 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s0 ; 7E0C0200 v_mad_f32 v0, v6, s1, v0 ; D2820000 04000306 s_movk_i32 s0, 0x460 ; B0000460 s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s0, v1, v0 ; D2820006 04020200 exp 15, 12, 0, 0, 0, v6, v5, v3, v1 ; F80000CF 01030506 s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v4 ; 10060804 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v0, v3 ; D2820003 040E0004 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v2, v3 ; D2820003 040E0404 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v1, v3 ; D2820003 040E0204 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v4 ; 100A0804 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v0, v5 ; D2820005 04160004 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v2, v5 ; D2820005 04160404 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v1, v5 ; D2820005 04160204 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v4 ; 100C0804 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v0, v6 ; D2820006 041A0004 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v2, v6 ; D2820006 041A0404 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v1, v6 ; D2820006 041A0204 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v4 ; 100E0804 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v0, v7 ; D2820007 041E0004 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v2, v7 ; D2820007 041E0404 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v1, v7 ; D2820007 041E0204 exp 15, 13, 0, 0, 0, v7, v6, v5, v3 ; F80000DF 03050607 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v3, s4, v4 ; 10060804 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v0, v3 ; D2820003 040E0004 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v2, v3 ; D2820003 040E0404 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v1, v3 ; D2820003 040E0204 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v4 ; 100A0804 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v0, v5 ; D2820005 04160004 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v2, v5 ; D2820005 04160404 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v1, v5 ; D2820005 04160204 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v4 ; 100C0804 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v0, v6 ; D2820006 041A0004 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v2, v6 ; D2820006 041A0404 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s4, v1, v6 ; D2820006 041A0204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v4 ; 10080804 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s4, v0, v4 ; D2820000 04120004 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s4, v2, v0 ; D2820000 04020404 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s0, v1, v0 ; D2820000 04020200 exp 15, 14, 0, 1, 0, v0, v6, v5, v3 ; F80008EF 03050600 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..14] DCL TEMP[0..67], LOCAL IMM[0] FLT32 { 0.0000, -1.0000, 1.0000, 1.9632} IMM[1] FLT32 { 0.1250, 0.0417, 0.2500, -0.2500} IMM[2] FLT32 { 1.3333, 0.7500, -0.5000, 0.5000} IMM[3] INT32 {0, 16, 1, 0} IMM[4] FLT32 { 0.1111, 4.0000, 20.0000, 0.0000} 0: MOV TEMP[0].xyz, IMM[0].xyzx 1: MAD TEMP[1].xy, CONST[14].zwww, IMM[0].xyyy, IN[0].xyyy 2: MOV TEMP[2].xy, TEMP[1].xyyy 3: MOV TEMP[2].w, IMM[0].xxxx 4: TXL TEMP[2], TEMP[2], SAMP[0], 2D 5: MOV TEMP[3], TEMP[2] 6: MOV TEMP[1], TEMP[2] 7: MAD TEMP[4].xy, CONST[14].zwww, IMM[0].yxxx, IN[0].xyyy 8: MOV TEMP[5].xy, TEMP[4].xyyy 9: MOV TEMP[5].w, IMM[0].xxxx 10: TXL TEMP[5], TEMP[5], SAMP[0], 2D 11: MOV TEMP[6], TEMP[5] 12: MOV TEMP[4], TEMP[5] 13: MAD TEMP[7].xy, CONST[14].zwww, IMM[0].zxxx, IN[0].xyyy 14: MOV TEMP[3].xy, TEMP[7].xyyy 15: MOV TEMP[3].w, IMM[0].xxxx 16: TXL TEMP[3], TEMP[3], SAMP[0], 2D 17: MOV TEMP[8], TEMP[3] 18: MOV TEMP[7], TEMP[3] 19: MAD TEMP[9].xy, CONST[14].zwww, IMM[0].xzzz, IN[0].xyyy 20: MOV TEMP[10].xy, TEMP[9].xyyy 21: MOV TEMP[10].w, IMM[0].xxxx 22: TXL TEMP[10], TEMP[10], SAMP[0], 2D 23: MOV TEMP[11], TEMP[10] 24: MOV TEMP[9], TEMP[10] 25: MAD TEMP[12].x, TEMP[2].yyyy, IMM[0].wwww, TEMP[2].xxxx 26: MOV TEMP[0].w, TEMP[12].xxxx 27: MAD TEMP[13].x, TEMP[5].yyyy, IMM[0].wwww, TEMP[5].xxxx 28: MAD TEMP[6].x, TEMP[3].yyyy, IMM[0].wwww, TEMP[3].xxxx 29: MAD TEMP[14].x, TEMP[10].yyyy, IMM[0].wwww, TEMP[10].xxxx 30: MIN TEMP[15].x, TEMP[13].xxxx, TEMP[12].xxxx 31: ADD TEMP[16].x, -TEMP[6].xxxx, TEMP[14].xxxx 32: MOV TEMP[13].y, TEMP[16].xxxx 33: MOV TEMP[17].x, TEMP[13] 34: FSGE TEMP[8].x, TEMP[16].xxxx, IMM[0].xxxx 35: UIF TEMP[8].xxxx :0 36: MOV TEMP[8].x, TEMP[6].xxxx 37: ELSE :0 38: MOV TEMP[8].x, TEMP[14].xxxx 39: ENDIF 40: FSGE TEMP[16].x, TEMP[16].xxxx, IMM[0].xxxx 41: UIF TEMP[16].xxxx :0 42: MOV TEMP[16].x, TEMP[14].xxxx 43: ELSE :0 44: MOV TEMP[16].x, TEMP[6].xxxx 45: ENDIF 46: MOV TEMP[13].x, TEMP[13].xxxw 47: MIN TEMP[8].x, TEMP[8].xxxx, TEMP[15].xxxx 48: MAX TEMP[15].x, TEMP[12].xxxx, TEMP[17].xxxx 49: MAX TEMP[16].x, TEMP[15].xxxx, TEMP[16].xxxx 50: MOV TEMP[8].y, TEMP[16].xxxx 51: MIN TEMP[18].x, TEMP[5].wwww, TEMP[2].wwww 52: MIN TEMP[19].x, TEMP[3].wwww, TEMP[10].wwww 53: MIN TEMP[15].x, TEMP[19].xxxx, TEMP[18].xxxx 54: MAX TEMP[18].x, TEMP[2].wwww, TEMP[5].wwww 55: MAX TEMP[19].x, TEMP[10].wwww, TEMP[3].wwww 56: MAX TEMP[18].x, TEMP[18].xxxx, TEMP[19].xxxx 57: MUL TEMP[19].xyw, IMM[0].zzxx, IN[0].xyxx 58: MOV TEMP[20].xy, TEMP[19].xyyy 59: MOV TEMP[20].w, TEMP[19].wwww 60: TXL TEMP[20], TEMP[20], SAMP[0], 2D 61: MOV TEMP[21], TEMP[20] 62: MOV TEMP[19], TEMP[20] 63: MAX TEMP[18].x, TEMP[20].wwww, TEMP[18].xxxx 64: MOV TEMP[22].z, TEMP[18].xxxx 65: MIN TEMP[11].x, TEMP[15].xxxx, TEMP[20].wwww 66: ADD TEMP[11].x, -TEMP[11].xxxx, TEMP[18].xxxx 67: MOV TEMP[22].w, TEMP[11].xxxx 68: MAD TEMP[23].x, TEMP[20].yyyy, IMM[0].wwww, TEMP[20].xxxx 69: MOV TEMP[13].y, TEMP[23].xxxx 70: MIN TEMP[24].x, TEMP[8].xxxx, TEMP[23].xxxx 71: MAX TEMP[16].x, TEMP[23].xxxx, TEMP[16].xxxx 72: ADD TEMP[24].x, -TEMP[24].xxxx, TEMP[16].xxxx 73: MOV TEMP[6].w, TEMP[24].xxxx 74: ADD TEMP[25].x, -TEMP[11].xxxx, TEMP[24].xxxx 75: MOV TEMP[13].z, TEMP[25].xxxx 76: MOV TEMP[22].x, TEMP[3].wwww 77: MOV TEMP[22].y, TEMP[10].wwww 78: FSGE TEMP[26].x, TEMP[25].xxxx, IMM[0].xxxx 79: UIF TEMP[26].xxxx :0 80: MOV TEMP[26].x, TEMP[6].xxxx 81: ELSE :0 82: MOV TEMP[26].x, TEMP[3].wwww 83: ENDIF 84: MOV TEMP[17].x, TEMP[26].xxxx 85: FSGE TEMP[26].x, TEMP[25].xxxx, IMM[0].xxxx 86: UIF TEMP[26].xxxx :0 87: MOV TEMP[14].x, TEMP[14].xxxx 88: ELSE :0 89: MOV TEMP[14].x, TEMP[10].wwww 90: ENDIF 91: MOV TEMP[17].y, TEMP[14].xxxx 92: FSGE TEMP[14].x, TEMP[25].xxxx, IMM[0].xxxx 93: UIF TEMP[14].xxxx :0 94: MOV TEMP[14].x, TEMP[16].xxxx 95: ELSE :0 96: MOV TEMP[14].x, TEMP[18].xxxx 97: ENDIF 98: MOV TEMP[17].z, TEMP[14].xxxx 99: FSGE TEMP[16].x, TEMP[25].xxxx, IMM[0].xxxx 100: UIF TEMP[16].xxxx :0 101: MOV TEMP[16].x, TEMP[24].xxxx 102: ELSE :0 103: MOV TEMP[16].x, TEMP[11].xxxx 104: ENDIF 105: MOV TEMP[17].w, TEMP[16].xxxx 106: MOV TEMP[15], TEMP[17] 107: MUL TEMP[14].x, TEMP[14].xxxx, IMM[1].xxxx 108: MOV TEMP[13].w, TEMP[14].xxxx 109: MAX TEMP[14].x, IMM[1].yyyy, TEMP[14].xxxx 110: MOV TEMP[6].x, TEMP[14].xxxx 111: MUL TEMP[18].xy, IMM[0].xzzz, CONST[14].zwww 112: MOV TEMP[6].yz, TEMP[18].yxyy 113: FSGE TEMP[14].x, TEMP[16].xxxx, TEMP[14].xxxx 114: UIF TEMP[14].xxxx :0 115: ADD TEMP[8], TEMP[2], TEMP[5] 116: ADD TEMP[8], TEMP[20], TEMP[8] 117: ADD TEMP[7], TEMP[3], TEMP[8] 118: ADD TEMP[7], TEMP[10], TEMP[7] 119: MOV TEMP[17].xyz, TEMP[0] 120: FSGE TEMP[3].x, TEMP[25].xxxx, IMM[0].xxxx 121: UIF TEMP[3].xxxx :0 122: MOV TEMP[3].x, TEMP[12].xxxx 123: ELSE :0 124: MOV TEMP[3].x, TEMP[2].wwww 125: ENDIF 126: MOV TEMP[17].w, TEMP[3].xxxx 127: MOV TEMP[0], TEMP[17] 128: MOV TEMP[17].yzw, TEMP[2] 129: FSGE TEMP[2].x, TEMP[25].xxxx, IMM[0].xxxx 130: UIF TEMP[2].xxxx :0 131: MOV TEMP[2].x, TEMP[13].xxxx 132: ELSE :0 133: MOV TEMP[2].x, TEMP[5].wwww 134: ENDIF 135: MOV TEMP[17].x, TEMP[2].xxxx 136: MOV TEMP[1], TEMP[17] 137: FSGE TEMP[5].x, TEMP[25].xxxx, IMM[0].xxxx 138: UIF TEMP[5].xxxx :0 139: MOV TEMP[5].x, TEMP[23].xxxx 140: ELSE :0 141: MOV TEMP[5].x, TEMP[20].wwww 142: ENDIF 143: MOV TEMP[17].y, TEMP[5].xxxx 144: MOV TEMP[1].xy, TEMP[17].xyxx 145: RCP TEMP[3].x, TEMP[15].wwww 146: MOV TEMP[1].z, TEMP[3].xxxx 147: ADD TEMP[2].x, TEMP[0].wwww, TEMP[2].xxxx 148: MOV TEMP[1].w, TEMP[2].xxxx 149: ADD TEMP[2].x, TEMP[15].xxxx, TEMP[2].xxxx 150: MOV TEMP[1].w, TEMP[2].xxxx 151: ADD TEMP[2].x, TEMP[15].yyyy, TEMP[2].xxxx 152: MOV TEMP[1].w, TEMP[2].xxxx 153: MAD TEMP[2].x, TEMP[2].xxxx, IMM[1].zzzz, -TEMP[5].xxxx 154: MOV TEMP[1].w, TEMP[2].xxxx 155: ABS TEMP[2].x, TEMP[2].xxxx 156: MAD TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx, IMM[1].wwww 157: MOV TEMP[1].z, TEMP[2].xxxx 158: MAX TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx 159: MUL TEMP[2].x, TEMP[2].xxxx, IMM[2].xxxx 160: MOV TEMP[1].z, TEMP[2].xxxx 161: MIN TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy 162: MOV TEMP[4].x, TEMP[2].xxxx 163: ADD TEMP[9].xy, -CONST[14].zwww, IN[0].xyyy 164: MOV TEMP[9].zw, IMM[0].xxxx 165: MOV TEMP[5].xy, TEMP[9].xyyy 166: MOV TEMP[5].w, IMM[0].xxxx 167: TXL TEMP[5], TEMP[5], SAMP[0], 2D 168: MOV TEMP[27], TEMP[5] 169: MOV TEMP[9].zw, TEMP[5] 170: MAD TEMP[8].x, TEMP[5].yyyy, IMM[0].wwww, TEMP[5].xxxx 171: MAD TEMP[3].xy, CONST[14].zwww, TEMP[0].zyyy, IN[0].xyyy 172: MOV TEMP[3].xy, TEMP[3].xyyy 173: MOV TEMP[3].w, IMM[0].xxxx 174: TXL TEMP[3], TEMP[3], SAMP[0], 2D 175: MOV TEMP[28], TEMP[3] 176: MOV TEMP[3], TEMP[3] 177: MAD TEMP[10].x, TEMP[3].yyyy, IMM[0].wwww, TEMP[3].xxxx 178: MOV TEMP[8].y, TEMP[10].xxxx 179: MAD TEMP[12].xy, CONST[14].zwww, TEMP[0].yzzz, IN[0].xyyy 180: MOV TEMP[12].xy, TEMP[12].xyyy 181: MOV TEMP[12].w, IMM[0].xxxx 182: TXL TEMP[12], TEMP[12], SAMP[0], 2D 183: MOV TEMP[29], TEMP[12] 184: MOV TEMP[12], TEMP[12] 185: ADD TEMP[14], TEMP[5], TEMP[3] 186: ADD TEMP[14], TEMP[12], TEMP[14] 187: ADD TEMP[16].xy, CONST[14].zwww, IN[0].xyyy 188: MOV TEMP[16].xy, TEMP[16].xyyy 189: MOV TEMP[16].w, IMM[0].xxxx 190: TXL TEMP[16], TEMP[16], SAMP[0], 2D 191: MOV TEMP[30], TEMP[16] 192: MOV TEMP[16].w, TEMP[16].wwww 193: ADD TEMP[14], TEMP[14], TEMP[16] 194: ADD TEMP[7], TEMP[7], TEMP[14] 195: MAD TEMP[14].x, TEMP[12].yyyy, IMM[0].wwww, TEMP[12].xxxx 196: MOV TEMP[8].z, TEMP[14].xxxx 197: MAD TEMP[20].x, TEMP[16].yyyy, IMM[0].wwww, TEMP[16].xxxx 198: MOV TEMP[8].w, TEMP[20].xxxx 199: MOV TEMP[16].x, TEMP[5].wwww 200: MOV TEMP[16].y, TEMP[3].wwww 201: MOV TEMP[16].z, TEMP[12].wwww 202: FSGE TEMP[11].x, TEMP[25].xxxx, IMM[0].xxxx 203: UIF TEMP[11].xxxx :0 204: MOV TEMP[8].x, TEMP[8].xxxx 205: ELSE :0 206: MOV TEMP[8].x, TEMP[5].wwww 207: ENDIF 208: MOV TEMP[17].x, TEMP[8].xxxx 209: FSGE TEMP[5].x, TEMP[25].xxxx, IMM[0].xxxx 210: UIF TEMP[5].xxxx :0 211: MOV TEMP[5].x, TEMP[10].xxxx 212: ELSE :0 213: MOV TEMP[5].x, TEMP[3].wwww 214: ENDIF 215: MOV TEMP[17].y, TEMP[5].xxxx 216: FSGE TEMP[3].x, TEMP[25].xxxx, IMM[0].xxxx 217: UIF TEMP[3].xxxx :0 218: MOV TEMP[3].x, TEMP[14].xxxx 219: ELSE :0 220: MOV TEMP[3].x, TEMP[12].wwww 221: ENDIF 222: MOV TEMP[17].z, TEMP[3].xxxx 223: FSGE TEMP[10].x, TEMP[25].xxxx, IMM[0].xxxx 224: UIF TEMP[10].xxxx :0 225: MOV TEMP[10].x, TEMP[20].xxxx 226: ELSE :0 227: MOV TEMP[10].x, TEMP[16].wwww 228: ENDIF 229: MOV TEMP[17].w, TEMP[10].xxxx 230: MOV TEMP[9].zw, TEMP[17] 231: MUL TEMP[12].x, TEMP[0].wwww, IMM[2].zzzz 232: MAD TEMP[12].x, TEMP[8].xxxx, IMM[1].zzzz, TEMP[12].xxxx 233: MAD TEMP[5].x, TEMP[5].xxxx, IMM[1].zzzz, TEMP[12].xxxx 234: MAD TEMP[12].x, TEMP[1].xxxx, IMM[2].wwww, -TEMP[1].yyyy 235: MOV TEMP[1].z, TEMP[12].xxxx 236: MAD TEMP[12].x, TEMP[15].xxxx, IMM[2].wwww, TEMP[12].xxxx 237: MOV TEMP[1].z, TEMP[12].xxxx 238: ABS TEMP[5].x, TEMP[5].xxxx 239: ABS TEMP[12].x, TEMP[12].xxxx 240: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[12].xxxx 241: MUL TEMP[12].xy, TEMP[15].yxxx, IMM[2].zzzz 242: MOV TEMP[1].zw, TEMP[12].yyxy 243: MAD TEMP[12].xy, TEMP[17].zyyy, IMM[1].zzzz, TEMP[12].xyyy 244: MOV TEMP[1].zw, TEMP[12].yyxy 245: MAD TEMP[10].xy, TEMP[10].xxxx, IMM[1].zzzz, TEMP[12].xyyy 246: MOV TEMP[1].zw, TEMP[10].yyxy 247: ABS TEMP[12].x, TEMP[10].xxxx 248: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[12].xxxx 249: MUL TEMP[12].x, TEMP[8].xxxx, IMM[1].zzzz 250: MOV TEMP[1].z, TEMP[12].xxxx 251: MAD TEMP[12].x, TEMP[1].xxxx, IMM[2].zzzz, TEMP[12].xxxx 252: MOV TEMP[1].z, TEMP[12].xxxx 253: MAD TEMP[3].x, TEMP[3].xxxx, IMM[1].zzzz, TEMP[12].xxxx 254: MOV TEMP[1].z, TEMP[3].xxxx 255: MAD TEMP[12].x, TEMP[0].wwww, IMM[2].wwww, -TEMP[1].yyyy 256: MOV TEMP[4].y, TEMP[12].xxxx 257: MAD TEMP[12].x, TEMP[15].yyyy, IMM[2].wwww, TEMP[12].xxxx 258: MOV TEMP[4].y, TEMP[12].xxxx 259: ABS TEMP[3].x, TEMP[3].xxxx 260: ABS TEMP[12].x, TEMP[12].xxxx 261: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[12].xxxx 262: MOV TEMP[1].z, TEMP[3].xxxx 263: ABS TEMP[10].x, TEMP[10].yyyy 264: ADD TEMP[3].x, TEMP[10].xxxx, TEMP[3].xxxx 265: MOV TEMP[1].z, TEMP[3].xxxx 266: ADD TEMP[5].x, -TEMP[5].xxxx, TEMP[3].xxxx 267: MOV TEMP[0].y, TEMP[5].xxxx 268: MOV TEMP[17].xyz, TEMP[0] 269: FSGE TEMP[5].x, TEMP[5].xxxx, IMM[0].xxxx 270: UIF TEMP[5].xxxx :0 271: MOV TEMP[5].x, TEMP[0].wwww 272: ELSE :0 273: MOV TEMP[5].x, TEMP[1].xxxx 274: ENDIF 275: MOV TEMP[17].w, TEMP[5].xxxx 276: MOV TEMP[0], TEMP[17] 277: MOV TEMP[17].y, TEMP[1] 278: FSGE TEMP[5].x, TEMP[0].yyyy, IMM[0].xxxx 279: UIF TEMP[5].xxxx :0 280: MOV TEMP[5].x, TEMP[15].yyyy 281: ELSE :0 282: MOV TEMP[5].x, TEMP[15].xxxx 283: ENDIF 284: MOV TEMP[17].x, TEMP[5].xxxx 285: MOV TEMP[1].y, TEMP[17].xyxx 286: MUL TEMP[7], TEMP[2].xxxx, TEMP[7] 287: ADD TEMP[3].x, -TEMP[17].yyyy, TEMP[0].wwww 288: ADD TEMP[10].x, -TEMP[17].yyyy, TEMP[5].xxxx 289: MOV TEMP[1].w, TEMP[10].xxxx 290: ABS TEMP[12].x, TEMP[3].xxxx 291: ABS TEMP[14].x, TEMP[10].xxxx 292: MAX TEMP[12].x, TEMP[12].xxxx, TEMP[14].xxxx 293: MOV TEMP[4].y, TEMP[12].xxxx 294: ADD TEMP[12].x, TEMP[17].yyyy, TEMP[0].wwww 295: MUL TEMP[12].x, TEMP[12].xxxx, IMM[2].wwww 296: MOV TEMP[0].w, TEMP[12].xxxx 297: ADD TEMP[1].x, TEMP[17].yyyy, TEMP[5].xxxx 298: MUL TEMP[1].x, TEMP[1].xxxx, IMM[2].wwww 299: ABS TEMP[5].x, TEMP[3].xxxx 300: ABS TEMP[3].x, TEMP[10].xxxx 301: ADD TEMP[5].x, TEMP[5].xxxx, -TEMP[3].xxxx 302: MOV TEMP[1].z, TEMP[5].xxxx 303: MOV TEMP[17].xyz, TEMP[0] 304: FSGE TEMP[5].x, TEMP[5].xxxx, IMM[0].xxxx 305: UIF TEMP[5].xxxx :0 306: MOV TEMP[5].x, TEMP[12].xxxx 307: ELSE :0 308: MOV TEMP[5].x, TEMP[1].xxxx 309: ENDIF 310: MOV TEMP[17].w, TEMP[5].xxxx 311: MOV TEMP[0], TEMP[17] 312: MOV TEMP[17].yzw, TEMP[1] 313: FSGE TEMP[5].x, TEMP[0].yyyy, IMM[0].xxxx 314: UIF TEMP[5].xxxx :0 315: MOV TEMP[5].x, -CONST[14].wwww 316: ELSE :0 317: MOV TEMP[5].x, -CONST[14].zzzz 318: ENDIF 319: FSGE TEMP[3].x, TEMP[17].zzzz, IMM[0].xxxx 320: UIF TEMP[3].xxxx :0 321: MOV TEMP[3].x, TEMP[5].xxxx 322: ELSE :0 323: MOV TEMP[3].x, -TEMP[5].xxxx 324: ENDIF 325: MOV TEMP[17].x, TEMP[3].xxxx 326: MOV TEMP[1].xzw, TEMP[17].xxzw 327: ADD TEMP[5].x, TEMP[17].yyyy, -TEMP[0].wwww 328: MOV TEMP[1].y, TEMP[5].xxxx 329: MOV TEMP[17].xzw, TEMP[1] 330: FSGE TEMP[5].x, TEMP[5].xxxx, IMM[0].xxxx 331: UIF TEMP[5].xxxx :0 332: MOV TEMP[5].x, IMM[0].xxxx 333: ELSE :0 334: MOV TEMP[5].x, IMM[0].zzzz 335: ENDIF 336: MOV TEMP[17].y, TEMP[5].xxxx 337: MOV TEMP[1].xyw, TEMP[17] 338: MUL TEMP[5].xy, TEMP[0].zxxx, CONST[14].zzzz 339: MOV TEMP[0].xz, TEMP[5].xxyx 340: MOV TEMP[17].yw, TEMP[0] 341: FSGE TEMP[3].x, TEMP[0].yyyy, IMM[0].xxxx 342: UIF TEMP[3].xxxx :0 343: MOV TEMP[3].x, TEMP[5].xxxx 344: ELSE :0 345: MOV TEMP[3].x, TEMP[18].xxxx 346: ENDIF 347: MOV TEMP[17].x, TEMP[3].xxxx 348: FSGE TEMP[3].x, TEMP[0].yyyy, IMM[0].xxxx 349: UIF TEMP[3].xxxx :0 350: MOV TEMP[5].x, TEMP[5].yyyy 351: ELSE :0 352: MOV TEMP[5].x, TEMP[18].yyyy 353: ENDIF 354: MOV TEMP[17].z, TEMP[5].xxxx 355: MOV TEMP[0], TEMP[17] 356: MUL TEMP[5].x, TEMP[1].xxxx, IMM[2].wwww 357: MOV TEMP[1].z, TEMP[5].xxxx 358: MOV TEMP[17].xyz, TEMP[1] 359: FSGE TEMP[3].x, TEMP[0].yyyy, IMM[0].xxxx 360: UIF TEMP[3].xxxx :0 361: MOV TEMP[5].x, TEMP[5].xxxx 362: ELSE :0 363: MOV TEMP[5].x, IMM[0].xxxx 364: ENDIF 365: MOV TEMP[17].w, TEMP[5].xxxx 366: ADD TEMP[5].x, TEMP[5].xxxx, IN[0].yyyy 367: MOV TEMP[4].w, TEMP[5].xxxx 368: FSGE TEMP[5].x, TEMP[0].yyyy, IMM[0].xxxx 369: UIF TEMP[5].xxxx :0 370: MOV TEMP[5].x, IMM[0].xxxx 371: ELSE :0 372: MOV TEMP[5].x, TEMP[17].zzzz 373: ENDIF 374: MOV TEMP[17].z, TEMP[5].xxxx 375: MOV TEMP[1].xy, TEMP[17].xyxx 376: ADD TEMP[5].x, TEMP[5].xxxx, IN[0].xxxx 377: MOV TEMP[4].z, TEMP[5].xxxx 378: ADD TEMP[5].xy, TEMP[0].xzzz, TEMP[4].zwww 379: MOV TEMP[1].zw, TEMP[5].yyxy 380: ADD TEMP[3].xy, -TEMP[0].xzzz, TEMP[4].zwww 381: MOV TEMP[4].zw, TEMP[3].yyxy 382: MOV TEMP[9].xy, TEMP[3].xyxx 383: MOV TEMP[9].zw, TEMP[5].yyxy 384: MOV TEMP[13].xy, TEMP[0].wwww 385: MOV TEMP[13].zw, IMM[0].xxxx 386: MOV TEMP[5].x, IMM[3].xxxx 387: BGNLOOP :0 388: ISGE TEMP[3].x, TEMP[5].xxxx, IMM[3].yyyy 389: UIF TEMP[3].xxxx :0 390: BRK 391: ENDIF 392: FSLT TEMP[10].x, TEMP[6].wwww, TEMP[22].wwww 393: UIF TEMP[10].xxxx :0 394: MOV TEMP[17].yzw, TEMP[6] 395: FSGE TEMP[12].x, -TEMP[13].zzzz, IMM[0].xxxx 396: UIF TEMP[12].xxxx :0 397: MOV TEMP[14].x, IMM[0].zzzz 398: ELSE :0 399: MOV TEMP[14].x, IMM[0].xxxx 400: ENDIF 401: FSNE TEMP[16].x, TEMP[14].xxxx, -TEMP[14].xxxx 402: UIF TEMP[16].xxxx :0 403: MUL TEMP[15].xyw, TEMP[9].xyxx, IMM[0].zzxx 404: MOV TEMP[8].xy, TEMP[15].xyyy 405: MOV TEMP[8].w, TEMP[15].wwww 406: TXL TEMP[18], TEMP[8], SAMP[0], 2D 407: MOV TEMP[31], TEMP[18] 408: MOV TEMP[15], TEMP[18] 409: MOV TEMP[13].x, TEMP[18].wwww 410: ENDIF 411: FSGE TEMP[20].x, -TEMP[13].wwww, IMM[0].xxxx 412: UIF TEMP[20].xxxx :0 413: MOV TEMP[11].x, IMM[0].zzzz 414: ELSE :0 415: MOV TEMP[11].x, IMM[0].xxxx 416: ENDIF 417: MOV TEMP[17].x, TEMP[11].xxxx 418: MOV TEMP[6], TEMP[17] 419: FSNE TEMP[23].x, TEMP[11].xxxx, -TEMP[11].xxxx 420: UIF TEMP[23].xxxx :0 421: MUL TEMP[15], TEMP[9].zwzz, IMM[0].zzxx 422: MOV TEMP[24].xy, TEMP[15].xyyy 423: MOV TEMP[24].w, TEMP[15].wwww 424: TXL TEMP[25], TEMP[24], SAMP[0], 2D 425: MOV TEMP[32], TEMP[25] 426: MOV TEMP[15], TEMP[25] 427: MOV TEMP[13].y, TEMP[25].wwww 428: ENDIF 429: ELSE :0 430: MOV TEMP[17].yzw, TEMP[6] 431: FSGE TEMP[26].x, -TEMP[13].zzzz, IMM[0].xxxx 432: UIF TEMP[26].xxxx :0 433: MOV TEMP[33].x, IMM[0].zzzz 434: ELSE :0 435: MOV TEMP[33].x, IMM[0].xxxx 436: ENDIF 437: FSNE TEMP[34].x, TEMP[33].xxxx, -TEMP[33].xxxx 438: UIF TEMP[34].xxxx :0 439: MUL TEMP[15], TEMP[9].xyxx, IMM[0].zzxx 440: MOV TEMP[35].xy, TEMP[15].xyyy 441: MOV TEMP[35].w, TEMP[15].wwww 442: TXL TEMP[36], TEMP[35], SAMP[0], 2D 443: MOV TEMP[37], TEMP[36] 444: MOV TEMP[15], TEMP[36] 445: MAD TEMP[13].x, TEMP[36].yyyy, IMM[0].wwww, TEMP[36].xxxx 446: ENDIF 447: FSGE TEMP[38].x, -TEMP[13].wwww, IMM[0].xxxx 448: UIF TEMP[38].xxxx :0 449: MOV TEMP[39].x, IMM[0].zzzz 450: ELSE :0 451: MOV TEMP[39].x, IMM[0].xxxx 452: ENDIF 453: MOV TEMP[17].x, TEMP[39].xxxx 454: MOV TEMP[6].zw, TEMP[17] 455: FSNE TEMP[40].x, TEMP[39].xxxx, -TEMP[39].xxxx 456: UIF TEMP[40].xxxx :0 457: MUL TEMP[15], TEMP[9].zwzz, IMM[0].zzxx 458: MOV TEMP[41].xy, TEMP[15].xyyy 459: MOV TEMP[41].w, TEMP[15].wwww 460: TXL TEMP[42], TEMP[41], SAMP[0], 2D 461: MOV TEMP[43], TEMP[42] 462: MOV TEMP[15], TEMP[42] 463: MAD TEMP[44].x, TEMP[42].yyyy, IMM[0].wwww, TEMP[42].xxxx 464: MOV TEMP[13].y, TEMP[44].xxxx 465: ENDIF 466: ENDIF 467: ADD TEMP[6].x, -TEMP[0].wwww, TEMP[13].yyyy 468: ABS TEMP[45].x, TEMP[6].xxxx 469: MAD TEMP[6].x, TEMP[4].yyyy, IMM[1].wwww, TEMP[45].xxxx 470: FSGE TEMP[46].x, TEMP[6].xxxx, IMM[0].xxxx 471: UIF TEMP[46].xxxx :0 472: MOV TEMP[47].x, IMM[0].zzzz 473: ELSE :0 474: MOV TEMP[47].x, IMM[0].xxxx 475: ENDIF 476: MOV TEMP[6].zw, TEMP[6].wwzw 477: ADD TEMP[6].x, TEMP[13].wwww, TEMP[47].xxxx 478: ADD TEMP[48].x, -TEMP[0].wwww, TEMP[13].xxxx 479: ABS TEMP[49].x, TEMP[48].xxxx 480: MAD TEMP[50].x, TEMP[4].yyyy, IMM[1].wwww, TEMP[49].xxxx 481: FSGE TEMP[51].x, TEMP[50].xxxx, IMM[0].xxxx 482: UIF TEMP[51].xxxx :0 483: MOV TEMP[52].x, IMM[0].zzzz 484: ELSE :0 485: MOV TEMP[52].x, IMM[0].xxxx 486: ENDIF 487: MOV TEMP[6].xw, TEMP[6].xxzw 488: ADD TEMP[53].x, TEMP[13].zzzz, TEMP[52].xxxx 489: MOV TEMP[6].y, TEMP[53].xxxx 490: MOV TEMP[17].xy, TEMP[13] 491: FSGE TEMP[54].x, -TEMP[53].xxxx, IMM[0].xxxx 492: UIF TEMP[54].xxxx :0 493: MOV TEMP[55].x, IMM[0].xxxx 494: ELSE :0 495: MOV TEMP[55].x, IMM[0].zzzz 496: ENDIF 497: MOV TEMP[17].z, TEMP[55].xxxx 498: FSGE TEMP[56].x, -TEMP[6].xxxx, IMM[0].xxxx 499: UIF TEMP[56].xxxx :0 500: MOV TEMP[57].x, IMM[0].xxxx 501: ELSE :0 502: MOV TEMP[57].x, IMM[0].zzzz 503: ENDIF 504: MOV TEMP[17].w, TEMP[57].xxxx 505: MOV TEMP[13], TEMP[17] 506: MUL TEMP[58].x, TEMP[57].xxxx, TEMP[55].xxxx 507: MOV TEMP[6].z, TEMP[58].xxxx 508: FSNE TEMP[59].x, TEMP[58].xxxx, -TEMP[58].xxxx 509: UIF TEMP[59].xxxx :0 510: BRK 511: ENDIF 512: ADD TEMP[15].xy, TEMP[0].xzzz, TEMP[9].zwww 513: MOV TEMP[17].xy, TEMP[9] 514: FSGE TEMP[21].x, -TEMP[6].xxxx, IMM[0].xxxx 515: UIF TEMP[21].xxxx :0 516: MOV TEMP[60].x, TEMP[15].xxxx 517: ELSE :0 518: MOV TEMP[60].x, TEMP[9].zzzz 519: ENDIF 520: MOV TEMP[17].z, TEMP[60].xxxx 521: FSGE TEMP[61].x, -TEMP[6].xxxx, IMM[0].xxxx 522: UIF TEMP[61].xxxx :0 523: MOV TEMP[62].x, TEMP[15].yyyy 524: ELSE :0 525: MOV TEMP[62].x, TEMP[9].wwww 526: ENDIF 527: MOV TEMP[17].w, TEMP[62].xxxx 528: ADD TEMP[63].xy, -TEMP[0].xzzz, TEMP[17].xyyy 529: MOV TEMP[6].xz, TEMP[63].xxyx 530: FSGE TEMP[64].x, -TEMP[53].xxxx, IMM[0].xxxx 531: UIF TEMP[64].xxxx :0 532: MOV TEMP[65].x, TEMP[63].xxxx 533: ELSE :0 534: MOV TEMP[65].x, TEMP[17].xxxx 535: ENDIF 536: MOV TEMP[17].x, TEMP[65].xxxx 537: FSGE TEMP[66].x, -TEMP[53].xxxx, IMM[0].xxxx 538: UIF TEMP[66].xxxx :0 539: MOV TEMP[67].x, TEMP[63].yyyy 540: ELSE :0 541: MOV TEMP[67].x, TEMP[17].yyyy 542: ENDIF 543: MOV TEMP[17].y, TEMP[67].xxxx 544: MOV TEMP[9], TEMP[17] 545: UADD TEMP[5].x, TEMP[5].xxxx, IMM[3].zzzz 546: ENDLOOP :0 547: ADD TEMP[4].xy, -TEMP[9].xyyy, IN[0].xyyy 548: MOV TEMP[0].xz, TEMP[4].xxyx 549: MOV TEMP[17].yw, TEMP[0] 550: FSGE TEMP[5].x, TEMP[0].yyyy, IMM[0].xxxx 551: UIF TEMP[5].xxxx :0 552: MOV TEMP[5].x, TEMP[4].xxxx 553: ELSE :0 554: MOV TEMP[5].x, TEMP[4].yyyy 555: ENDIF 556: MOV TEMP[17].x, TEMP[5].xxxx 557: MOV TEMP[0].xyw, TEMP[17].xyxw 558: ADD TEMP[4].x, TEMP[9].zzzz, -IN[0].xxxx 559: MOV TEMP[0].z, TEMP[4].xxxx 560: ADD TEMP[5].x, TEMP[9].wwww, -IN[0].yyyy 561: MOV TEMP[17].xyw, TEMP[0] 562: FSGE TEMP[3].x, TEMP[0].yyyy, IMM[0].xxxx 563: UIF TEMP[3].xxxx :0 564: MOV TEMP[4].x, TEMP[4].xxxx 565: ELSE :0 566: MOV TEMP[4].x, TEMP[5].xxxx 567: ENDIF 568: MOV TEMP[17].z, TEMP[4].xxxx 569: MOV TEMP[0], TEMP[17] 570: ADD TEMP[4].x, TEMP[17].xxxx, -TEMP[4].xxxx 571: FSGE TEMP[4].x, TEMP[4].xxxx, IMM[0].xxxx 572: UIF TEMP[4].xxxx :0 573: MOV TEMP[4].x, TEMP[13].yyyy 574: ELSE :0 575: MOV TEMP[4].x, TEMP[13].xxxx 576: ENDIF 577: MOV TEMP[1].xy, TEMP[1].xyxw 578: ADD TEMP[4].x, -TEMP[0].wwww, TEMP[4].xxxx 579: MIN TEMP[5].x, TEMP[0].zzzz, TEMP[0].xxxx 580: FSGE TEMP[4].x, TEMP[4].xxxx, IMM[0].xxxx 581: UIF TEMP[4].xxxx :0 582: MOV TEMP[4].x, IMM[0].xxxx 583: ELSE :0 584: MOV TEMP[4].x, IMM[0].zzzz 585: ENDIF 586: MOV TEMP[0].xyz, TEMP[0].xyzx 587: ADD TEMP[4].x, TEMP[1].yyyy, -TEMP[4].xxxx 588: MOV TEMP[0].w, TEMP[4].xxxx 589: MOV TEMP[17].yz, TEMP[0] 590: ABS TEMP[4].x, TEMP[4].xxxx 591: FSGE TEMP[4].x, -TEMP[4].xxxx, IMM[0].xxxx 592: UIF TEMP[4].xxxx :0 593: MOV TEMP[4].x, IMM[0].xxxx 594: ELSE :0 595: MOV TEMP[4].x, TEMP[1].xxxx 596: ENDIF 597: MOV TEMP[17].w, TEMP[4].xxxx 598: MOV TEMP[0].yzw, TEMP[17].zyzw 599: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[17].zzzz 600: RCP TEMP[0].x, TEMP[0].xxxx 601: MAD TEMP[0].x, TEMP[5].xxxx, -TEMP[0].xxxx, IMM[2].wwww 602: MUL TEMP[0].x, TEMP[4].xxxx, TEMP[0].xxxx 603: MOV TEMP[17].xy, TEMP[0] 604: FSGE TEMP[4].x, TEMP[0].yyyy, IMM[0].xxxx 605: UIF TEMP[4].xxxx :0 606: MOV TEMP[4].x, TEMP[0].xxxx 607: ELSE :0 608: MOV TEMP[4].x, IMM[0].xxxx 609: ENDIF 610: ADD TEMP[4].x, TEMP[4].xxxx, IN[0].yyyy 611: MOV TEMP[1].y, TEMP[4].xxxx 612: FSGE TEMP[4].x, TEMP[17].yyyy, IMM[0].xxxx 613: UIF TEMP[4].xxxx :0 614: MOV TEMP[4].x, IMM[0].xxxx 615: ELSE :0 616: MOV TEMP[4].x, TEMP[17].xxxx 617: ENDIF 618: ADD TEMP[1].x, TEMP[4].xxxx, IN[0].xxxx 619: MOV TEMP[4].xy, TEMP[1].xyyy 620: MOV TEMP[4].w, IMM[0].xxxx 621: TXL TEMP[4], TEMP[4], SAMP[0], 2D 622: MAD TEMP[1], TEMP[7], IMM[4].xxxx, TEMP[4] 623: MAD TEMP[19], -TEMP[2].xxxx, TEMP[4], TEMP[1] 624: ENDIF 625: MAX TEMP[0].x, TEMP[19].xxxx, TEMP[19].yyyy 626: MAX TEMP[1].x, TEMP[0].xxxx, TEMP[19].zzzz 627: MUL TEMP[0].x, TEMP[1].xxxx, IMM[4].yyyy 628: MAX TEMP[1].x, TEMP[19].wwww, TEMP[0].xxxx 629: ADD TEMP[0].x, IMM[2].zzzz, IN[0].xxxx 630: ABS TEMP[2].x, TEMP[0].xxxx 631: ADD TEMP[0].x, -TEMP[2].xxxx, IMM[2].wwww 632: MUL TEMP[0].x, TEMP[0].xxxx, IMM[4].zzzz 633: MOV_SAT TEMP[0].x, TEMP[0].xxxx 634: MUL TEMP[0].x, TEMP[1].xxxx, TEMP[0].xxxx 635: MOV TEMP[19].w, TEMP[0].xxxx 636: MOV OUT[0], TEMP[19] 637: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %24, 0.000000e+00 %33 = fadd float %32, %30 %34 = fmul float %25, -1.000000e+00 %35 = fadd float %34, %31 %36 = bitcast float %33 to i32 %37 = bitcast float %35 to i32 %38 = bitcast float 0.000000e+00 to i32 %39 = insertelement <4 x i32> undef, i32 %36, i32 0 %40 = insertelement <4 x i32> %39, i32 %37, i32 1 %41 = insertelement <4 x i32> %40, i32 %38, i32 2 %42 = insertelement <4 x i32> %41, i32 undef, i32 3 %43 = bitcast <8 x i32> %27 to <32 x i8> %44 = bitcast <4 x i32> %29 to <16 x i8> %45 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %42, <32 x i8> %43, <16 x i8> %44, i32 2) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = fmul float %24, -1.000000e+00 %51 = fadd float %50, %30 %52 = fmul float %25, 0.000000e+00 %53 = fadd float %52, %31 %54 = bitcast float %51 to i32 %55 = bitcast float %53 to i32 %56 = bitcast float 0.000000e+00 to i32 %57 = insertelement <4 x i32> undef, i32 %54, i32 0 %58 = insertelement <4 x i32> %57, i32 %55, i32 1 %59 = insertelement <4 x i32> %58, i32 %56, i32 2 %60 = insertelement <4 x i32> %59, i32 undef, i32 3 %61 = bitcast <8 x i32> %27 to <32 x i8> %62 = bitcast <4 x i32> %29 to <16 x i8> %63 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %60, <32 x i8> %61, <16 x i8> %62, i32 2) %64 = extractelement <4 x float> %63, i32 0 %65 = extractelement <4 x float> %63, i32 1 %66 = extractelement <4 x float> %63, i32 2 %67 = extractelement <4 x float> %63, i32 3 %68 = fmul float %24, 1.000000e+00 %69 = fadd float %68, %30 %70 = fmul float %25, 0.000000e+00 %71 = fadd float %70, %31 %72 = bitcast float %69 to i32 %73 = bitcast float %71 to i32 %74 = bitcast float 0.000000e+00 to i32 %75 = insertelement <4 x i32> undef, i32 %72, i32 0 %76 = insertelement <4 x i32> %75, i32 %73, i32 1 %77 = insertelement <4 x i32> %76, i32 %74, i32 2 %78 = insertelement <4 x i32> %77, i32 undef, i32 3 %79 = bitcast <8 x i32> %27 to <32 x i8> %80 = bitcast <4 x i32> %29 to <16 x i8> %81 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %78, <32 x i8> %79, <16 x i8> %80, i32 2) %82 = extractelement <4 x float> %81, i32 0 %83 = extractelement <4 x float> %81, i32 1 %84 = extractelement <4 x float> %81, i32 2 %85 = extractelement <4 x float> %81, i32 3 %86 = fmul float %24, 0.000000e+00 %87 = fadd float %86, %30 %88 = fmul float %25, 1.000000e+00 %89 = fadd float %88, %31 %90 = bitcast float %87 to i32 %91 = bitcast float %89 to i32 %92 = bitcast float 0.000000e+00 to i32 %93 = insertelement <4 x i32> undef, i32 %90, i32 0 %94 = insertelement <4 x i32> %93, i32 %91, i32 1 %95 = insertelement <4 x i32> %94, i32 %92, i32 2 %96 = insertelement <4 x i32> %95, i32 undef, i32 3 %97 = bitcast <8 x i32> %27 to <32 x i8> %98 = bitcast <4 x i32> %29 to <16 x i8> %99 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = extractelement <4 x float> %99, i32 2 %103 = extractelement <4 x float> %99, i32 3 %104 = fmul float %47, 0x3FFF694FA0000000 %105 = fadd float %104, %46 %106 = fmul float %65, 0x3FFF694FA0000000 %107 = fadd float %106, %64 %108 = fmul float %83, 0x3FFF694FA0000000 %109 = fadd float %108, %82 %110 = fmul float %101, 0x3FFF694FA0000000 %111 = fadd float %110, %100 %112 = call float @llvm.minnum.f32(float %107, float %105) %113 = fsub float -0.000000e+00, %109 %114 = fadd float %113, %111 %115 = fcmp oge float %114, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %116 to float %118 = bitcast float %117 to i32 %119 = icmp ne i32 %118, 0 %. = select i1 %119, float %109, float %111 %120 = fcmp oge float %114, 0.000000e+00 %121 = sext i1 %120 to i32 %122 = bitcast i32 %121 to float %123 = bitcast float %122 to i32 %124 = icmp ne i32 %123, 0 %temp64.0 = select i1 %124, float %111, float %109 %125 = call float @llvm.minnum.f32(float %., float %112) %126 = call float @llvm.maxnum.f32(float %105, float %107) %127 = call float @llvm.maxnum.f32(float %126, float %temp64.0) %128 = call float @llvm.minnum.f32(float %67, float %49) %129 = call float @llvm.minnum.f32(float %85, float %103) %130 = call float @llvm.minnum.f32(float %129, float %128) %131 = call float @llvm.maxnum.f32(float %49, float %67) %132 = call float @llvm.maxnum.f32(float %103, float %85) %133 = call float @llvm.maxnum.f32(float %131, float %132) %134 = fmul float 1.000000e+00, %30 %135 = fmul float 1.000000e+00, %31 %136 = fmul float 0.000000e+00, %30 %137 = bitcast float %134 to i32 %138 = bitcast float %135 to i32 %139 = bitcast float %136 to i32 %140 = insertelement <4 x i32> undef, i32 %137, i32 0 %141 = insertelement <4 x i32> %140, i32 %138, i32 1 %142 = insertelement <4 x i32> %141, i32 %139, i32 2 %143 = insertelement <4 x i32> %142, i32 undef, i32 3 %144 = bitcast <8 x i32> %27 to <32 x i8> %145 = bitcast <4 x i32> %29 to <16 x i8> %146 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %143, <32 x i8> %144, <16 x i8> %145, i32 2) %147 = extractelement <4 x float> %146, i32 0 %148 = extractelement <4 x float> %146, i32 1 %149 = extractelement <4 x float> %146, i32 2 %150 = extractelement <4 x float> %146, i32 3 %151 = call float @llvm.maxnum.f32(float %150, float %133) %152 = call float @llvm.minnum.f32(float %130, float %150) %153 = fsub float -0.000000e+00, %152 %154 = fadd float %153, %151 %155 = fmul float %148, 0x3FFF694FA0000000 %156 = fadd float %155, %147 %157 = call float @llvm.minnum.f32(float %125, float %156) %158 = call float @llvm.maxnum.f32(float %156, float %127) %159 = fsub float -0.000000e+00, %157 %160 = fadd float %159, %158 %161 = fsub float -0.000000e+00, %154 %162 = fadd float %161, %160 %163 = fcmp oge float %162, 0.000000e+00 %164 = sext i1 %163 to i32 %165 = bitcast i32 %164 to float %166 = bitcast float %165 to i32 %167 = icmp ne i32 %166, 0 %.429 = select i1 %167, float %109, float %85 %168 = fcmp oge float %162, 0.000000e+00 %169 = sext i1 %168 to i32 %170 = bitcast i32 %169 to float %171 = bitcast float %170 to i32 %172 = icmp ne i32 %171, 0 %temp56.0 = select i1 %172, float %111, float %103 %173 = fcmp oge float %162, 0.000000e+00 %174 = sext i1 %173 to i32 %175 = bitcast i32 %174 to float %176 = bitcast float %175 to i32 %177 = icmp ne i32 %176, 0 %.430 = select i1 %177, float %158, float %151 %178 = fcmp oge float %162, 0.000000e+00 %179 = sext i1 %178 to i32 %180 = bitcast i32 %179 to float %181 = bitcast float %180 to i32 %182 = icmp ne i32 %181, 0 %temp64.1 = select i1 %182, float %160, float %154 %183 = fmul float %.430, 1.250000e-01 %184 = call float @llvm.maxnum.f32(float 0x3FA5555560000000, float %183) %185 = fmul float 0.000000e+00, %24 %186 = fmul float 1.000000e+00, %25 %187 = fcmp oge float %temp64.1, %184 %188 = sext i1 %187 to i32 %189 = bitcast i32 %188 to float %190 = bitcast float %189 to i32 %191 = icmp ne i32 %190, 0 br i1 %191, label %IF288, label %ENDIF287 IF288: ; preds = %main_body %192 = fadd float %46, %64 %193 = fadd float %47, %65 %194 = fadd float %48, %66 %195 = fadd float %49, %67 %196 = fadd float %147, %192 %197 = fadd float %148, %193 %198 = fadd float %149, %194 %199 = fadd float %150, %195 %200 = fadd float %82, %196 %201 = fadd float %83, %197 %202 = fadd float %84, %198 %203 = fadd float %85, %199 %204 = fadd float %100, %200 %205 = fadd float %101, %201 %206 = fadd float %102, %202 %207 = fadd float %103, %203 %208 = fcmp oge float %162, 0.000000e+00 %209 = sext i1 %208 to i32 %210 = bitcast i32 %209 to float %211 = bitcast float %210 to i32 %212 = icmp ne i32 %211, 0 %.431 = select i1 %212, float %105, float %49 %213 = fcmp oge float %162, 0.000000e+00 %214 = sext i1 %213 to i32 %215 = bitcast i32 %214 to float %216 = bitcast float %215 to i32 %217 = icmp ne i32 %216, 0 %temp8.0 = select i1 %217, float %107, float %67 %218 = fcmp oge float %162, 0.000000e+00 %219 = sext i1 %218 to i32 %220 = bitcast i32 %219 to float %221 = bitcast float %220 to i32 %222 = icmp ne i32 %221, 0 %.432 = select i1 %222, float %156, float %150 %223 = fdiv float 1.000000e+00, %temp64.1 %224 = fadd float %.431, %temp8.0 %225 = fadd float %.429, %224 %226 = fadd float %temp56.0, %225 %227 = fsub float -0.000000e+00, %.432 %228 = fmul float %226, 2.500000e-01 %229 = fadd float %228, %227 %230 = call float @fabs(float %229) %231 = fmul float %230, %223 %232 = fadd float %231, -2.500000e-01 %233 = call float @llvm.maxnum.f32(float %232, float 0.000000e+00) %234 = fmul float %233, 0x3FF5555560000000 %235 = call float @llvm.minnum.f32(float %234, float 7.500000e-01) %236 = fsub float -0.000000e+00, %24 %237 = fadd float %236, %30 %238 = fsub float -0.000000e+00, %25 %239 = fadd float %238, %31 %240 = bitcast float %237 to i32 %241 = bitcast float %239 to i32 %242 = bitcast float 0.000000e+00 to i32 %243 = insertelement <4 x i32> undef, i32 %240, i32 0 %244 = insertelement <4 x i32> %243, i32 %241, i32 1 %245 = insertelement <4 x i32> %244, i32 %242, i32 2 %246 = insertelement <4 x i32> %245, i32 undef, i32 3 %247 = bitcast <8 x i32> %27 to <32 x i8> %248 = bitcast <4 x i32> %29 to <16 x i8> %249 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %246, <32 x i8> %247, <16 x i8> %248, i32 2) %250 = extractelement <4 x float> %249, i32 0 %251 = extractelement <4 x float> %249, i32 1 %252 = extractelement <4 x float> %249, i32 2 %253 = extractelement <4 x float> %249, i32 3 %254 = fmul float %251, 0x3FFF694FA0000000 %255 = fadd float %254, %250 %256 = fmul float %24, 1.000000e+00 %257 = fadd float %256, %30 %258 = fmul float %25, -1.000000e+00 %259 = fadd float %258, %31 %260 = bitcast float %257 to i32 %261 = bitcast float %259 to i32 %262 = bitcast float 0.000000e+00 to i32 %263 = insertelement <4 x i32> undef, i32 %260, i32 0 %264 = insertelement <4 x i32> %263, i32 %261, i32 1 %265 = insertelement <4 x i32> %264, i32 %262, i32 2 %266 = insertelement <4 x i32> %265, i32 undef, i32 3 %267 = bitcast <8 x i32> %27 to <32 x i8> %268 = bitcast <4 x i32> %29 to <16 x i8> %269 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %266, <32 x i8> %267, <16 x i8> %268, i32 2) %270 = extractelement <4 x float> %269, i32 0 %271 = extractelement <4 x float> %269, i32 1 %272 = extractelement <4 x float> %269, i32 2 %273 = extractelement <4 x float> %269, i32 3 %274 = fmul float %271, 0x3FFF694FA0000000 %275 = fadd float %274, %270 %276 = fmul float %24, -1.000000e+00 %277 = fadd float %276, %30 %278 = fmul float %25, 1.000000e+00 %279 = fadd float %278, %31 %280 = bitcast float %277 to i32 %281 = bitcast float %279 to i32 %282 = bitcast float 0.000000e+00 to i32 %283 = insertelement <4 x i32> undef, i32 %280, i32 0 %284 = insertelement <4 x i32> %283, i32 %281, i32 1 %285 = insertelement <4 x i32> %284, i32 %282, i32 2 %286 = insertelement <4 x i32> %285, i32 undef, i32 3 %287 = bitcast <8 x i32> %27 to <32 x i8> %288 = bitcast <4 x i32> %29 to <16 x i8> %289 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %286, <32 x i8> %287, <16 x i8> %288, i32 2) %290 = extractelement <4 x float> %289, i32 0 %291 = extractelement <4 x float> %289, i32 1 %292 = extractelement <4 x float> %289, i32 2 %293 = extractelement <4 x float> %289, i32 3 %294 = fadd float %250, %270 %295 = fadd float %251, %271 %296 = fadd float %252, %272 %297 = fadd float %253, %273 %298 = fadd float %290, %294 %299 = fadd float %291, %295 %300 = fadd float %292, %296 %301 = fadd float %293, %297 %302 = fadd float %24, %30 %303 = fadd float %25, %31 %304 = bitcast float %302 to i32 %305 = bitcast float %303 to i32 %306 = bitcast float 0.000000e+00 to i32 %307 = insertelement <4 x i32> undef, i32 %304, i32 0 %308 = insertelement <4 x i32> %307, i32 %305, i32 1 %309 = insertelement <4 x i32> %308, i32 %306, i32 2 %310 = insertelement <4 x i32> %309, i32 undef, i32 3 %311 = bitcast <8 x i32> %27 to <32 x i8> %312 = bitcast <4 x i32> %29 to <16 x i8> %313 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %310, <32 x i8> %311, <16 x i8> %312, i32 2) %314 = extractelement <4 x float> %313, i32 0 %315 = extractelement <4 x float> %313, i32 1 %316 = extractelement <4 x float> %313, i32 2 %317 = extractelement <4 x float> %313, i32 3 %318 = fadd float %298, %314 %319 = fadd float %299, %315 %320 = fadd float %300, %316 %321 = fadd float %301, %317 %322 = fadd float %204, %318 %323 = fadd float %205, %319 %324 = fadd float %206, %320 %325 = fadd float %207, %321 %326 = fmul float %291, 0x3FFF694FA0000000 %327 = fadd float %326, %290 %328 = fmul float %315, 0x3FFF694FA0000000 %329 = fadd float %328, %314 %330 = fcmp oge float %162, 0.000000e+00 %331 = sext i1 %330 to i32 %332 = bitcast i32 %331 to float %333 = bitcast float %332 to i32 %334 = icmp ne i32 %333, 0 %temp32.1 = select i1 %334, float %255, float %253 %335 = fcmp oge float %162, 0.000000e+00 %336 = sext i1 %335 to i32 %337 = bitcast i32 %336 to float %338 = bitcast float %337 to i32 %339 = icmp ne i32 %338, 0 %.433 = select i1 %339, float %275, float %273 %340 = fcmp oge float %162, 0.000000e+00 %341 = sext i1 %340 to i32 %342 = bitcast i32 %341 to float %343 = bitcast float %342 to i32 %344 = icmp ne i32 %343, 0 %temp12.1 = select i1 %344, float %327, float %293 %345 = fcmp oge float %162, 0.000000e+00 %346 = sext i1 %345 to i32 %347 = bitcast i32 %346 to float %348 = bitcast float %347 to i32 %349 = icmp ne i32 %348, 0 %.434 = select i1 %349, float %329, float %317 %350 = fmul float %.431, -5.000000e-01 %351 = fmul float %temp32.1, 2.500000e-01 %352 = fadd float %351, %350 %353 = fmul float %.433, 2.500000e-01 %354 = fadd float %353, %352 %355 = fsub float -0.000000e+00, %.432 %356 = fmul float %temp8.0, 5.000000e-01 %357 = fadd float %356, %355 %358 = fmul float %.429, 5.000000e-01 %359 = fadd float %358, %357 %360 = call float @fabs(float %354) %361 = call float @fabs(float %359) %362 = fadd float %360, %361 %363 = fmul float %temp56.0, -5.000000e-01 %364 = fmul float %.429, -5.000000e-01 %365 = fmul float %temp12.1, 2.500000e-01 %366 = fadd float %365, %363 %367 = fmul float %.433, 2.500000e-01 %368 = fadd float %367, %364 %369 = fmul float %.434, 2.500000e-01 %370 = fadd float %369, %366 %371 = fmul float %.434, 2.500000e-01 %372 = fadd float %371, %368 %373 = call float @fabs(float %370) %374 = fadd float %362, %373 %375 = fmul float %temp32.1, 2.500000e-01 %376 = fmul float %temp8.0, -5.000000e-01 %377 = fadd float %376, %375 %378 = fmul float %temp12.1, 2.500000e-01 %379 = fadd float %378, %377 %380 = fsub float -0.000000e+00, %.432 %381 = fmul float %.431, 5.000000e-01 %382 = fadd float %381, %380 %383 = fmul float %temp56.0, 5.000000e-01 %384 = fadd float %383, %382 %385 = call float @fabs(float %379) %386 = call float @fabs(float %384) %387 = fadd float %385, %386 %388 = call float @fabs(float %372) %389 = fadd float %388, %387 %390 = fsub float -0.000000e+00, %374 %391 = fadd float %390, %389 %392 = fcmp oge float %391, 0.000000e+00 %393 = sext i1 %392 to i32 %394 = bitcast i32 %393 to float %395 = bitcast float %394 to i32 %396 = icmp ne i32 %395, 0 %temp20.2 = select i1 %396, float %.431, float %temp8.0 %397 = fcmp oge float %391, 0.000000e+00 %398 = sext i1 %397 to i32 %399 = bitcast i32 %398 to float %400 = bitcast float %399 to i32 %401 = icmp ne i32 %400, 0 %temp56.0..429 = select i1 %401, float %temp56.0, float %.429 %402 = fmul float %235, %322 %403 = fmul float %235, %323 %404 = fmul float %235, %324 %405 = fmul float %235, %325 %406 = fsub float -0.000000e+00, %.432 %407 = fadd float %406, %temp20.2 %408 = fsub float -0.000000e+00, %.432 %409 = fadd float %408, %temp56.0..429 %410 = call float @fabs(float %407) %411 = call float @fabs(float %409) %412 = call float @llvm.maxnum.f32(float %410, float %411) %413 = fadd float %.432, %temp20.2 %414 = fmul float %413, 5.000000e-01 %415 = fadd float %.432, %temp56.0..429 %416 = fmul float %415, 5.000000e-01 %417 = call float @fabs(float %407) %418 = call float @fabs(float %409) %419 = fsub float -0.000000e+00, %418 %420 = fadd float %417, %419 %421 = fcmp oge float %420, 0.000000e+00 %422 = sext i1 %421 to i32 %423 = bitcast i32 %422 to float %424 = bitcast float %423 to i32 %425 = icmp ne i32 %424, 0 %temp20.4 = select i1 %425, float %414, float %416 %426 = fcmp oge float %391, 0.000000e+00 %427 = sext i1 %426 to i32 %428 = bitcast i32 %427 to float %429 = bitcast float %428 to i32 %430 = icmp ne i32 %429, 0 %.sink = select i1 %430, float %25, float %24 %431 = fsub float -0.000000e+00, %.sink %432 = fcmp oge float %420, 0.000000e+00 %433 = sext i1 %432 to i32 %434 = bitcast i32 %433 to float %435 = bitcast float %434 to i32 %436 = icmp ne i32 %435, 0 br i1 %436, label %ENDIF323, label %ELSE325 ENDIF287: ; preds = %main_body, %ENDLOOP %temp79.0 = phi float [ %609, %ENDLOOP ], [ %150, %main_body ] %temp78.0 = phi float [ %606, %ENDLOOP ], [ %149, %main_body ] %temp77.0 = phi float [ %603, %ENDLOOP ], [ %148, %main_body ] %temp76.0 = phi float [ %600, %ENDLOOP ], [ %147, %main_body ] %437 = call float @llvm.maxnum.f32(float %temp76.0, float %temp77.0) %438 = call float @llvm.maxnum.f32(float %437, float %temp78.0) %439 = fmul float %438, 4.000000e+00 %440 = call float @llvm.maxnum.f32(float %temp79.0, float %439) %441 = fadd float -5.000000e-01, %30 %442 = call float @fabs(float %441) %443 = fsub float -0.000000e+00, %442 %444 = fadd float %443, 5.000000e-01 %445 = fmul float %444, 2.000000e+01 %446 = call float @llvm.AMDIL.clamp.(float %445, float 0.000000e+00, float 1.000000e+00) %447 = fmul float %440, %446 %448 = call i32 @llvm.SI.packf16(float %temp76.0, float %temp77.0) %449 = bitcast i32 %448 to float %450 = call i32 @llvm.SI.packf16(float %temp78.0, float %447) %451 = bitcast i32 %450 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %449, float %451, float %449, float %451) ret void ELSE325: ; preds = %IF288 %452 = fsub float -0.000000e+00, %431 br label %ENDIF323 ENDIF323: ; preds = %IF288, %ELSE325 %temp12.2 = phi float [ %452, %ELSE325 ], [ %431, %IF288 ] %453 = fsub float -0.000000e+00, %temp20.4 %454 = fadd float %.432, %453 %455 = fcmp oge float %454, 0.000000e+00 %456 = sext i1 %455 to i32 %457 = bitcast i32 %456 to float %458 = bitcast float %457 to i32 %459 = icmp ne i32 %458, 0 %.435 = select i1 %459, float 0.000000e+00, float 1.000000e+00 %460 = fmul float 1.000000e+00, %24 %461 = fmul float 0.000000e+00, %24 %462 = fcmp oge float %391, 0.000000e+00 %463 = sext i1 %462 to i32 %464 = bitcast i32 %463 to float %465 = bitcast float %464 to i32 %466 = icmp ne i32 %465, 0 %temp12.3 = select i1 %466, float %460, float %185 %467 = fcmp oge float %391, 0.000000e+00 %468 = sext i1 %467 to i32 %469 = bitcast i32 %468 to float %470 = bitcast float %469 to i32 %471 = icmp ne i32 %470, 0 %.436 = select i1 %471, float %461, float %186 %472 = fmul float %temp12.2, 5.000000e-01 %473 = fcmp oge float %391, 0.000000e+00 %474 = sext i1 %473 to i32 %475 = bitcast i32 %474 to float %476 = bitcast float %475 to i32 %477 = icmp ne i32 %476, 0 %temp20.8 = select i1 %477, float %472, float 0.000000e+00 %478 = fadd float %temp20.8, %31 %479 = fcmp oge float %391, 0.000000e+00 %480 = sext i1 %479 to i32 %481 = bitcast i32 %480 to float %482 = bitcast float %481 to i32 %483 = icmp ne i32 %482, 0 %.437 = select i1 %483, float 0.000000e+00, float %472 %484 = fadd float %.437, %30 %485 = fadd float %temp12.3, %484 %486 = fadd float %.436, %478 %487 = fsub float -0.000000e+00, %temp12.3 %488 = fadd float %487, %484 %489 = fsub float -0.000000e+00, %.436 %490 = fadd float %489, %478 %491 = fcmp olt float %160, %154 %492 = sext i1 %491 to i32 %493 = bitcast i32 %492 to float %494 = bitcast float %493 to i32 %495 = icmp ne i32 %494, 0 %496 = bitcast <8 x i32> %27 to <32 x i8> %497 = bitcast <4 x i32> %29 to <16 x i8> %498 = bitcast <8 x i32> %27 to <32 x i8> %499 = bitcast <4 x i32> %29 to <16 x i8> %500 = fsub float -0.000000e+00, %temp20.4 %501 = fmul float %412, -2.500000e-01 %502 = fsub float -0.000000e+00, %temp20.4 %503 = fmul float %412, -2.500000e-01 %504 = fsub float -0.000000e+00, %temp12.3 %505 = fsub float -0.000000e+00, %.436 %506 = bitcast <8 x i32> %27 to <32 x i8> %507 = bitcast <4 x i32> %29 to <16 x i8> %508 = bitcast <8 x i32> %27 to <32 x i8> %509 = bitcast <4 x i32> %29 to <16 x i8> br label %LOOP LOOP: ; preds = %ENDIF383, %ENDIF323 %temp55.0 = phi float [ 0.000000e+00, %ENDIF323 ], [ %temp228.0, %ENDIF383 ] %temp54.0 = phi float [ 0.000000e+00, %ENDIF323 ], [ %.444, %ENDIF383 ] %temp53.0 = phi float [ %temp20.4, %ENDIF323 ], [ %temp53.2, %ENDIF383 ] %temp52.0 = phi float [ %temp20.4, %ENDIF323 ], [ %temp52.2, %ENDIF383 ] %temp39.0 = phi float [ %486, %ENDIF323 ], [ %temp248.0, %ENDIF383 ] %temp38.0 = phi float [ %485, %ENDIF323 ], [ %.temp38.0, %ENDIF383 ] %temp37.0 = phi float [ %490, %ENDIF323 ], [ %temp268.0, %ENDIF383 ] %temp36.0 = phi float [ %488, %ENDIF323 ], [ %.temp36.0, %ENDIF383 ] %temp20.10 = phi float [ 0.000000e+00, %ENDIF323 ], [ %767, %ENDIF383 ] %510 = bitcast float %temp20.10 to i32 %511 = icmp sge i32 %510, 16 %512 = sext i1 %511 to i32 %513 = bitcast i32 %512 to float %514 = bitcast float %513 to i32 %515 = icmp ne i32 %514, 0 br i1 %515, label %ENDLOOP, label %ENDIF341 ENDLOOP: ; preds = %ENDIF344, %LOOP %temp36.0428 = phi float [ %temp36.0, %LOOP ], [ %temp36.0, %ENDIF344 ] %temp37.0426 = phi float [ %temp37.0, %LOOP ], [ %temp37.0, %ENDIF344 ] %temp38.0424 = phi float [ %temp38.0, %LOOP ], [ %temp38.0, %ENDIF344 ] %temp39.0422 = phi float [ %temp39.0, %LOOP ], [ %temp39.0, %ENDIF344 ] %temp53.1 = phi float [ %temp53.0, %LOOP ], [ %temp53.2, %ENDIF344 ] %temp52.1 = phi float [ %temp52.0, %LOOP ], [ %temp52.2, %ENDIF344 ] %516 = fsub float -0.000000e+00, %temp36.0428 %517 = fadd float %516, %30 %518 = fsub float -0.000000e+00, %temp37.0426 %519 = fadd float %518, %31 %520 = fcmp oge float %391, 0.000000e+00 %521 = sext i1 %520 to i32 %522 = bitcast i32 %521 to float %523 = bitcast float %522 to i32 %524 = icmp ne i32 %523, 0 %.438 = select i1 %524, float %517, float %519 %525 = fsub float -0.000000e+00, %30 %526 = fadd float %temp38.0424, %525 %527 = fsub float -0.000000e+00, %31 %528 = fadd float %temp39.0422, %527 %529 = fcmp oge float %391, 0.000000e+00 %530 = sext i1 %529 to i32 %531 = bitcast i32 %530 to float %532 = bitcast float %531 to i32 %533 = icmp ne i32 %532, 0 %temp16.0 = select i1 %533, float %526, float %528 %534 = fsub float -0.000000e+00, %temp16.0 %535 = fadd float %.438, %534 %536 = fcmp oge float %535, 0.000000e+00 %537 = sext i1 %536 to i32 %538 = bitcast i32 %537 to float %539 = bitcast float %538 to i32 %540 = icmp ne i32 %539, 0 %temp53.1.temp52.1 = select i1 %540, float %temp53.1, float %temp52.1 %541 = fsub float -0.000000e+00, %temp20.4 %542 = fadd float %541, %temp53.1.temp52.1 %543 = call float @llvm.minnum.f32(float %temp16.0, float %.438) %544 = fcmp oge float %542, 0.000000e+00 %545 = sext i1 %544 to i32 %546 = bitcast i32 %545 to float %547 = bitcast float %546 to i32 %548 = icmp ne i32 %547, 0 %temp16.2 = select i1 %548, float 0.000000e+00, float 1.000000e+00 %549 = fsub float -0.000000e+00, %temp16.2 %550 = fadd float %.435, %549 %551 = call float @fabs(float %550) %552 = fsub float -0.000000e+00, %551 %553 = fcmp oge float %552, 0.000000e+00 %554 = sext i1 %553 to i32 %555 = bitcast i32 %554 to float %556 = bitcast float %555 to i32 %557 = icmp ne i32 %556, 0 %.temp12.2 = select i1 %557, float 0.000000e+00, float %temp12.2 %558 = fadd float %.438, %temp16.0 %559 = fdiv float 1.000000e+00, %558 %560 = fsub float -0.000000e+00, %559 %561 = fmul float %543, %560 %562 = fadd float %561, 5.000000e-01 %563 = fmul float %.temp12.2, %562 %564 = fcmp oge float %391, 0.000000e+00 %565 = sext i1 %564 to i32 %566 = bitcast i32 %565 to float %567 = bitcast float %566 to i32 %568 = icmp ne i32 %567, 0 %temp16.4 = select i1 %568, float %563, float 0.000000e+00 %569 = fadd float %temp16.4, %31 %570 = fcmp oge float %391, 0.000000e+00 %571 = sext i1 %570 to i32 %572 = bitcast i32 %571 to float %573 = bitcast float %572 to i32 %574 = icmp ne i32 %573, 0 %.445 = select i1 %574, float 0.000000e+00, float %563 %575 = fadd float %.445, %30 %576 = bitcast float %575 to i32 %577 = bitcast float %569 to i32 %578 = bitcast float 0.000000e+00 to i32 %579 = insertelement <4 x i32> undef, i32 %576, i32 0 %580 = insertelement <4 x i32> %579, i32 %577, i32 1 %581 = insertelement <4 x i32> %580, i32 %578, i32 2 %582 = insertelement <4 x i32> %581, i32 undef, i32 3 %583 = bitcast <8 x i32> %27 to <32 x i8> %584 = bitcast <4 x i32> %29 to <16 x i8> %585 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %582, <32 x i8> %583, <16 x i8> %584, i32 2) %586 = extractelement <4 x float> %585, i32 0 %587 = extractelement <4 x float> %585, i32 1 %588 = extractelement <4 x float> %585, i32 2 %589 = extractelement <4 x float> %585, i32 3 %590 = fmul float %402, 0x3FBC71C720000000 %591 = fadd float %590, %586 %592 = fmul float %403, 0x3FBC71C720000000 %593 = fadd float %592, %587 %594 = fmul float %404, 0x3FBC71C720000000 %595 = fadd float %594, %588 %596 = fmul float %405, 0x3FBC71C720000000 %597 = fadd float %596, %589 %598 = fsub float -0.000000e+00, %235 %599 = fmul float %598, %586 %600 = fadd float %599, %591 %601 = fsub float -0.000000e+00, %235 %602 = fmul float %601, %587 %603 = fadd float %602, %593 %604 = fsub float -0.000000e+00, %235 %605 = fmul float %604, %588 %606 = fadd float %605, %595 %607 = fsub float -0.000000e+00, %235 %608 = fmul float %607, %589 %609 = fadd float %608, %597 br label %ENDIF287 ENDIF341: ; preds = %LOOP %610 = fsub float -0.000000e+00, %temp54.0 %611 = fcmp oge float %610, 0.000000e+00 %612 = sext i1 %611 to i32 %613 = bitcast i32 %612 to float %614 = bitcast float %613 to i32 %615 = icmp ne i32 %614, 0 %.439 = select i1 %615, float 1.000000e+00, float 0.000000e+00 %616 = fsub float -0.000000e+00, %.439 %617 = fcmp une float %.439, %616 %618 = sext i1 %617 to i32 %619 = bitcast i32 %618 to float %620 = bitcast float %619 to i32 %621 = icmp ne i32 %620, 0 br i1 %495, label %IF345, label %ELSE346 IF345: ; preds = %ENDIF341 br i1 %621, label %IF351, label %ENDIF350 ELSE346: ; preds = %ENDIF341 br i1 %621, label %IF363, label %ENDIF362 ENDIF344: ; preds = %IF369, %ENDIF362, %IF357, %ENDIF350 %temp53.2 = phi float [ %694, %IF357 ], [ %temp53.0, %ENDIF350 ], [ %736, %IF369 ], [ %temp53.0, %ENDIF362 ] %temp52.2 = phi float [ %temp52.3, %ENDIF350 ], [ %temp52.3, %IF357 ], [ %temp52.4, %ENDIF362 ], [ %temp52.4, %IF369 ] %622 = fadd float %500, %temp53.2 %623 = call float @fabs(float %622) %624 = fadd float %501, %623 %625 = fcmp oge float %624, 0.000000e+00 %626 = sext i1 %625 to i32 %627 = bitcast i32 %626 to float %628 = bitcast float %627 to i32 %629 = icmp ne i32 %628, 0 %.441 = select i1 %629, float 1.000000e+00, float 0.000000e+00 %630 = fadd float %temp55.0, %.441 %631 = fadd float %502, %temp52.2 %632 = call float @fabs(float %631) %633 = fadd float %503, %632 %634 = fcmp oge float %633, 0.000000e+00 %635 = sext i1 %634 to i32 %636 = bitcast i32 %635 to float %637 = bitcast float %636 to i32 %638 = icmp ne i32 %637, 0 %temp208.0 = select i1 %638, float 1.000000e+00, float 0.000000e+00 %639 = fadd float %temp54.0, %temp208.0 %640 = fsub float -0.000000e+00, %639 %641 = fcmp oge float %640, 0.000000e+00 %642 = sext i1 %641 to i32 %643 = bitcast i32 %642 to float %644 = bitcast float %643 to i32 %645 = icmp ne i32 %644, 0 %.444 = select i1 %645, float 0.000000e+00, float 1.000000e+00 %646 = fsub float -0.000000e+00, %630 %647 = fcmp oge float %646, 0.000000e+00 %648 = sext i1 %647 to i32 %649 = bitcast i32 %648 to float %650 = bitcast float %649 to i32 %651 = icmp ne i32 %650, 0 %temp228.0 = select i1 %651, float 0.000000e+00, float 1.000000e+00 %652 = fmul float %temp228.0, %.444 %653 = fsub float -0.000000e+00, %652 %654 = fcmp une float %652, %653 %655 = sext i1 %654 to i32 %656 = bitcast i32 %655 to float %657 = bitcast float %656 to i32 %658 = icmp ne i32 %657, 0 br i1 %658, label %ENDLOOP, label %ENDIF383 IF351: ; preds = %IF345 %659 = fmul float %temp36.0, 1.000000e+00 %660 = fmul float %temp37.0, 1.000000e+00 %661 = fmul float %temp36.0, 0.000000e+00 %662 = bitcast float %659 to i32 %663 = bitcast float %660 to i32 %664 = bitcast float %661 to i32 %665 = insertelement <4 x i32> undef, i32 %662, i32 0 %666 = insertelement <4 x i32> %665, i32 %663, i32 1 %667 = insertelement <4 x i32> %666, i32 %664, i32 2 %668 = insertelement <4 x i32> %667, i32 undef, i32 3 %669 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %668, <32 x i8> %496, <16 x i8> %497, i32 2) %670 = extractelement <4 x float> %669, i32 3 br label %ENDIF350 ENDIF350: ; preds = %IF345, %IF351 %temp52.3 = phi float [ %670, %IF351 ], [ %temp52.0, %IF345 ] %671 = fsub float -0.000000e+00, %temp55.0 %672 = fcmp oge float %671, 0.000000e+00 %673 = sext i1 %672 to i32 %674 = bitcast i32 %673 to float %675 = bitcast float %674 to i32 %676 = icmp ne i32 %675, 0 %.442 = select i1 %676, float 1.000000e+00, float 0.000000e+00 %677 = fsub float -0.000000e+00, %.442 %678 = fcmp une float %.442, %677 %679 = sext i1 %678 to i32 %680 = bitcast i32 %679 to float %681 = bitcast float %680 to i32 %682 = icmp ne i32 %681, 0 br i1 %682, label %IF357, label %ENDIF344 IF357: ; preds = %ENDIF350 %683 = fmul float %temp38.0, 1.000000e+00 %684 = fmul float %temp39.0, 1.000000e+00 %685 = fmul float %temp38.0, 0.000000e+00 %686 = bitcast float %683 to i32 %687 = bitcast float %684 to i32 %688 = bitcast float %685 to i32 %689 = insertelement <4 x i32> undef, i32 %686, i32 0 %690 = insertelement <4 x i32> %689, i32 %687, i32 1 %691 = insertelement <4 x i32> %690, i32 %688, i32 2 %692 = insertelement <4 x i32> %691, i32 undef, i32 3 %693 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %692, <32 x i8> %498, <16 x i8> %499, i32 2) %694 = extractelement <4 x float> %693, i32 3 br label %ENDIF344 IF363: ; preds = %ELSE346 %695 = fmul float %temp36.0, 1.000000e+00 %696 = fmul float %temp37.0, 1.000000e+00 %697 = fmul float %temp36.0, 0.000000e+00 %698 = bitcast float %695 to i32 %699 = bitcast float %696 to i32 %700 = bitcast float %697 to i32 %701 = insertelement <4 x i32> undef, i32 %698, i32 0 %702 = insertelement <4 x i32> %701, i32 %699, i32 1 %703 = insertelement <4 x i32> %702, i32 %700, i32 2 %704 = insertelement <4 x i32> %703, i32 undef, i32 3 %705 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %704, <32 x i8> %506, <16 x i8> %507, i32 2) %706 = extractelement <4 x float> %705, i32 0 %707 = extractelement <4 x float> %705, i32 1 %708 = fmul float %707, 0x3FFF694FA0000000 %709 = fadd float %708, %706 br label %ENDIF362 ENDIF362: ; preds = %ELSE346, %IF363 %temp52.4 = phi float [ %709, %IF363 ], [ %temp52.0, %ELSE346 ] %710 = fsub float -0.000000e+00, %temp55.0 %711 = fcmp oge float %710, 0.000000e+00 %712 = sext i1 %711 to i32 %713 = bitcast i32 %712 to float %714 = bitcast float %713 to i32 %715 = icmp ne i32 %714, 0 %.443 = select i1 %715, float 1.000000e+00, float 0.000000e+00 %716 = fsub float -0.000000e+00, %.443 %717 = fcmp une float %.443, %716 %718 = sext i1 %717 to i32 %719 = bitcast i32 %718 to float %720 = bitcast float %719 to i32 %721 = icmp ne i32 %720, 0 br i1 %721, label %IF369, label %ENDIF344 IF369: ; preds = %ENDIF362 %722 = fmul float %temp38.0, 1.000000e+00 %723 = fmul float %temp39.0, 1.000000e+00 %724 = fmul float %temp38.0, 0.000000e+00 %725 = bitcast float %722 to i32 %726 = bitcast float %723 to i32 %727 = bitcast float %724 to i32 %728 = insertelement <4 x i32> undef, i32 %725, i32 0 %729 = insertelement <4 x i32> %728, i32 %726, i32 1 %730 = insertelement <4 x i32> %729, i32 %727, i32 2 %731 = insertelement <4 x i32> %730, i32 undef, i32 3 %732 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %731, <32 x i8> %508, <16 x i8> %509, i32 2) %733 = extractelement <4 x float> %732, i32 0 %734 = extractelement <4 x float> %732, i32 1 %735 = fmul float %734, 0x3FFF694FA0000000 %736 = fadd float %735, %733 br label %ENDIF344 ENDIF383: ; preds = %ENDIF344 %737 = fadd float %temp12.3, %temp38.0 %738 = fadd float %.436, %temp39.0 %739 = fsub float -0.000000e+00, %630 %740 = fcmp oge float %739, 0.000000e+00 %741 = sext i1 %740 to i32 %742 = bitcast i32 %741 to float %743 = bitcast float %742 to i32 %744 = icmp ne i32 %743, 0 %.temp38.0 = select i1 %744, float %737, float %temp38.0 %745 = fsub float -0.000000e+00, %630 %746 = fcmp oge float %745, 0.000000e+00 %747 = sext i1 %746 to i32 %748 = bitcast i32 %747 to float %749 = bitcast float %748 to i32 %750 = icmp ne i32 %749, 0 %temp248.0 = select i1 %750, float %738, float %temp39.0 %751 = fadd float %504, %temp36.0 %752 = fadd float %505, %temp37.0 %753 = fsub float -0.000000e+00, %639 %754 = fcmp oge float %753, 0.000000e+00 %755 = sext i1 %754 to i32 %756 = bitcast i32 %755 to float %757 = bitcast float %756 to i32 %758 = icmp ne i32 %757, 0 %.temp36.0 = select i1 %758, float %751, float %temp36.0 %759 = fsub float -0.000000e+00, %639 %760 = fcmp oge float %759, 0.000000e+00 %761 = sext i1 %760 to i32 %762 = bitcast i32 %761 to float %763 = bitcast float %762 to i32 %764 = icmp ne i32 %763, 0 %temp268.0 = select i1 %764, float %752, float %temp37.0 %765 = bitcast float %temp20.10 to i32 %766 = add i32 %765, 1 %767 = bitcast i32 %766 to float br label %LOOP } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #3 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_mul_f32_e32 v4, 0, v2 ; 10080480 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[12:19], s[8:11] ; F0900F00 00431102 v_mov_b32_e32 v7, 0x3ffb4a7d ; 7E0E02FF 3FFB4A7D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v18, v7, v17 ; D2820000 04460F12 s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[4:7], 0x3b ; C200053B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, 0, s0, v3 ; D282000D 040C0080 s_buffer_load_dword s1, s[4:7], 0x3a ; C200853A s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v12, s1, v2 ; 06180401 v_mov_b32_e32 v14, 0 ; 7E1C0280 image_sample_l v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[12:15], s[12:19], s[8:11] ; F0900F00 0043150C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v22, v7, v21 ; D2820001 04560F16 v_add_f32_e32 v34, s0, v3 ; 06440600 v_mad_f32 v33, 0, s1, v2 ; D2820021 04080280 v_mov_b32_e32 v35, v14 ; 7E46030E image_sample_l v[25:28], 15, 0, 0, 0, 0, 0, 0, 0, v[33:36], s[12:19], s[8:11] ; F0900F00 00431921 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v11, v26, v7, v25 ; D282000B 04660F1A v_subrev_f32_e32 v6, v1, v11 ; 0A0C1701 v_cmp_ge_f32_e64 s[2:3], v6, 0 ; D00C0002 00010106 v_cndmask_b32_e64 v6, 0, -1, s[2:3] ; D2000006 00098280 v_cmp_ne_i32_e64 s[2:3], v6, 0 ; D10A0002 00010106 v_cndmask_b32_e64 v8, v11, v1, s[2:3] ; D2000008 180A030B v_subrev_f32_e32 v12, s1, v2 ; 0A180401 image_sample_l v[29:32], 15, 0, 0, 0, 0, 0, 0, 0, v[12:15], s[12:19], s[8:11] ; F0900F00 00431D0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, v30, v7, v29 ; D2820006 04760F1E v_subrev_f32_e32 v34, s0, v3 ; 0A440600 v_mov_b32_e32 v35, v14 ; 7E46030E image_sample_l v[34:37], 15, 0, 0, 0, 0, 0, 0, 0, v[33:36], s[12:19], s[8:11] ; F0900F00 00432221 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v35, v7, v34 ; D2820007 048A0F23 v_min_f32_e32 v9, v7, v6 ; 1E120D07 v_min3_f32 v8, v8, v9, v0 ; D2A20008 04021308 v_cndmask_b32_e64 v9, v1, v11, s[2:3] ; D2000809 080A1701 v_max_f32_e32 v10, v6, v7 ; 20140F06 v_max3_f32 v9, v0, v10, v9 ; D2A80009 04261500 v_subrev_f32_e32 v33, v8, v9 ; 0A421308 v_min_f32_e32 v8, v37, v32 ; 1E104125 v_min_f32_e32 v10, v28, v24 ; 1E14311C v_min3_f32 v8, v10, v8, v20 ; D2A20008 0452110A v_max_f32_e32 v10, v24, v28 ; 20143918 v_max_f32_e32 v12, v32, v37 ; 20184B20 v_max3_f32 v10, v20, v12, v10 ; D2A8000A 042A1914 v_subrev_f32_e32 v38, v8, v10 ; 0A4C1508 v_subrev_f32_e32 v12, v38, v33 ; 0A184326 v_cmp_ge_f32_e64 s[2:3], v12, 0 ; D00C0002 0001010C v_cndmask_b32_e64 v8, 0, -1, s[2:3] ; D2000008 00098280 v_cmp_ne_i32_e64 s[2:3], v8, 0 ; D10A0002 00010108 v_cndmask_b32_e64 v8, v38, v33, s[2:3] ; D2000008 100A4326 v_cndmask_b32_e64 v9, v10, v9, s[2:3] ; D2000809 100A130A v_mul_f32_e32 v9, 0x3e000000, v9 ; 101212FF 3E000000 v_max_f32_e32 v9, 0x3d2aaaab, v9 ; 201212FF 3D2AAAAB v_cmp_ge_f32_e32 vcc, v8, v9 ; 7C0C1308 v_cndmask_b32_e64 v9, 0, -1, vcc ; D2000809 01A98280 v_cmp_ne_i32_e64 s[2:3], v9, 0 ; D10A0002 00010109 s_and_saveexec_b64 s[2:3], s[2:3] ; BE822402 s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E s_cbranch_execz BB0_4 ; BF880000 v_mov_b32_e32 v10, s0 ; 7E140200 v_mov_b32_e32 v9, s1 ; 7E120201 v_add_f32_e32 v14, s0, v3 ; 061C0600 v_add_f32_e32 v13, s1, v2 ; 061A0401 v_mov_b32_e32 v15, 0 ; 7E1E0280 image_sample_l v[41:44], 15, 0, 0, 0, 0, 0, 0, 0, v[13:16], s[12:19], s[8:11] ; F0900F00 0043290D v_mov_b32_e32 v39, 0x3ffb4a7d ; 7E4E02FF 3FFB4A7D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v40, v42, v39, v41 ; D2820028 04A64F2A v_cmp_ge_f32_e64 s[4:5], v12, 0 ; D00C0004 0001010C v_cndmask_b32_e64 v12, 0, -1, s[4:5] ; D200000C 00118280 v_cmp_ne_i32_e64 s[4:5], v12, 0 ; D10A0004 0001010C v_cndmask_b32_e64 v12, v44, v40, s[4:5] ; D200000C 0012512C v_subrev_f32_e32 v55, s1, v2 ; 0A6E0401 v_mov_b32_e32 v56, v14 ; 7E70030E v_mov_b32_e32 v57, v15 ; 7E72030F image_sample_l v[47:50], 15, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[12:19], s[8:11] ; F0900F00 00432F37 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v40, v48, v39, v47 ; D2820028 04BE4F30 v_cndmask_b32_e64 v40, v50, v40, s[4:5] ; D2000028 10125132 v_cndmask_b32_e64 v11, v28, v11, s[4:5] ; D200080B 0012171C v_mul_f32_e32 v45, -0.5, v11 ; 105A16F1 v_mov_b32_e32 v46, 0x3e800000 ; 7E5C02FF 3E800000 v_mad_f32 v45, v40, v46, v45 ; D282002D 04B65D28 v_mad_f32 v45, v12, v46, v45 ; D282002D 04B65D0C v_subrev_f32_e32 v14, s0, v3 ; 0A1C0600 image_sample_l v[51:54], 15, 0, 0, 0, 0, 0, 0, 0, v[13:16], s[12:19], s[8:11] ; F0900F00 0043330D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v59, v52, v39, v51 ; D282003B 04CE4F34 v_cndmask_b32_e64 v59, v54, v59, s[4:5] ; D200083B 10127736 v_mov_b32_e32 v56, v14 ; 7E70030E v_mov_b32_e32 v57, v15 ; 7E72030F image_sample_l v[55:58], 15, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[12:19], s[8:11] ; F0900F00 00433737 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v13, v56, v39, v55 ; D282000D 04DE4F38 v_cndmask_b32_e64 v13, v58, v13, s[4:5] ; D200080D 10121B3A v_cndmask_b32_e64 v14, v37, v7, s[4:5] ; D200000E 08120F25 v_mul_f32_e32 v7, -0.5, v14 ; 100E1CF1 v_mad_f32 v7, v13, v46, v7 ; D2820007 041E5D0D v_mad_f32 v15, v59, v46, v7 ; D282000F 041E5D3B v_cndmask_b32_e64 v7, v20, v0, s[4:5] ; D2000807 00120114 v_cndmask_b32_e64 v16, v32, v6, s[4:5] ; D2000010 00120D20 v_mad_f32 v0, 0.5, v16, -v7 ; D2820000 841E20F0 v_cndmask_b32_e64 v39, v24, v1, s[4:5] ; D2000827 00120318 v_mad_f32 v0, 0.5, v39, v0 ; D2820000 04024EF0 v_add_f32_e64 v0, |v15|, |v0| ; D2060300 0002010F v_add_f32_e64 v0, v0, |v45| ; D2060200 00025B00 v_mul_f32_e32 v1, -0.5, v39 ; 10024EF1 v_mad_f32 v1, v59, v46, v1 ; D2820001 04065D3B v_mad_f32 v1, v12, v46, v1 ; D2820001 04065D0C v_mul_f32_e32 v6, 0x3e800000, v13 ; 100C1AFF 3E800000 v_mad_f32 v6, -0.5, v16, v6 ; D2820006 041A20F1 v_mad_f32 v6, v40, v46, v6 ; D2820006 041A5D28 v_mad_f32 v12, 0.5, v14, -v7 ; D282000C 841E1CF0 v_mad_f32 v12, 0.5, v11, v12 ; D282000C 043216F0 v_add_f32_e64 v6, |v6|, |v12| ; D2060306 00021906 v_add_f32_e64 v1, |v1|, v6 ; D2060101 00020D01 v_subrev_f32_e32 v1, v0, v1 ; 0A020300 v_cmp_ge_f32_e64 s[4:5], v1, 0 ; D00C0004 00010101 v_cndmask_b32_e64 v0, 0, -1, s[4:5] ; D2000000 00118280 v_cmp_ne_i32_e64 s[4:5], v0, 0 ; D10A0004 00010100 v_cndmask_b32_e64 v6, v39, v11, s[4:5] ; D2000006 18121727 v_subrev_f32_e32 v15, v7, v6 ; 0A1E0D07 v_cndmask_b32_e64 v12, v16, v14, s[4:5] ; D200000C 00121D10 v_subrev_f32_e32 v40, v7, v12 ; 0A501907 v_sub_f32_e64 v13, |v40|, |v15| ; D208030D 00021F28 v_cmp_ge_f32_e64 s[6:7], v13, 0 ; D00C0006 0001010D v_cndmask_b32_e64 v0, 0, -1, s[6:7] ; D2000000 00198280 v_cmp_eq_i32_e64 s[6:7], v0, 0 ; D1040006 00010100 v_cndmask_b32_e64 v0, v9, v10, s[4:5] ; D2000000 08121509 v_xor_b32_e32 v0, 0x80000000, v0 ; 3A0000FF 80000000 v_mov_b32_e32 v45, 0x7fffffff ; 7E5A02FF 7FFFFFFF v_and_b32_e32 v59, v15, v45 ; 36765B0F v_and_b32_e32 v60, v40, v45 ; 36785B28 v_add_f32_e32 v14, v16, v14 ; 061C1D10 v_add_f32_e32 v14, v14, v39 ; 061C4F0E v_add_f32_e32 v11, v14, v11 ; 0616170E v_mad_f32 v11, v11, v46, -v7 ; D282000B 841E5D0B v_and_b32_e32 v11, v11, v45 ; 36165B0B s_and_saveexec_b64 s[4:5], s[6:7] ; BE842406 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E s_waitcnt expcnt(0) ; BF8C070F v_xor_b32_e32 v0, 0x80000000, v0 ; 3A0000FF 80000000 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_add_f32_e32 v12, v12, v7 ; 06180F0C v_mul_f32_e32 v14, 0.5, v12 ; 101C18F0 v_add_f32_e32 v6, v6, v7 ; 060C0F06 v_mul_f32_e32 v6, 0.5, v6 ; 100C0CF0 v_mov_b32_e32 v12, 0 ; 7E180280 v_cmp_ge_f32_e64 s[4:5], v13, 0 ; D00C0004 0001010D v_cndmask_b32_e64 v13, 0, -1, s[4:5] ; D200080D 00118280 v_cmp_ne_i32_e64 s[4:5], v13, 0 ; D10A0004 0001010D v_cndmask_b32_e64 v6, v6, v14, s[4:5] ; D2000006 10121D06 v_subrev_f32_e32 v7, v6, v7 ; 0A0E0F06 v_cmp_ge_f32_e64 s[4:5], v7, 0 ; D00C0004 00010107 v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; D2000807 00118280 v_cmp_ne_i32_e64 s[4:5], v7, 0 ; D10A0004 00010107 v_cndmask_b32_e64 v7, 1.0, 0, s[4:5] ; D2000807 101100F2 v_mov_b32_e32 v61, 0x80000000 ; 7E7A02FF 80000000 v_xor_b32_e32 v13, v6, v61 ; 3A1A7B06 v_mul_f32_e64 v15, 0, s1 ; D210000F 00000280 v_cmp_ge_f32_e64 s[4:5], v1, 0 ; D00C0004 00010101 v_cndmask_b32_e64 v14, 0, -1, s[4:5] ; D200000E 00118280 v_cmp_ne_i32_e64 s[4:5], v14, 0 ; D10A0004 0001010E v_cndmask_b32_e64 v14, v10, v15, s[4:5] ; D200000E 10121F0A v_mul_f32_e32 v10, 0.5, v0 ; 101400F0 v_cndmask_b32_e64 v16, 0, v10, s[4:5] ; D2000010 00121480 v_add_f32_e32 v16, v3, v16 ; 06202103 v_subrev_f32_e32 v39, v14, v16 ; 0A4E210E v_cndmask_b32_e64 v15, v15, v9, s[4:5] ; D200080F 1812130F v_cndmask_b32_e64 v9, v10, 0, s[4:5] ; D2000809 1011010A v_add_f32_e32 v9, v2, v9 ; 06121302 v_subrev_f32_e32 v40, v15, v9 ; 0A50130F v_add_f32_e32 v45, v16, v14 ; 065A1D10 v_add_f32_e32 v46, v9, v15 ; 065C1F09 v_add_f32_e32 v9, v54, v58 ; 06127536 v_add_f32_e32 v9, v9, v50 ; 06126509 v_add_f32_e32 v9, v44, v9 ; 0612132C v_add_f32_e32 v10, v32, v37 ; 06144B20 v_add_f32_e32 v10, v10, v20 ; 0614290A v_add_f32_e32 v10, v10, v24 ; 0614310A v_add_f32_e32 v10, v10, v28 ; 0614390A v_add_f32_e32 v9, v9, v10 ; 06121509 v_rcp_f32_e32 v8, v8 ; 7E105508 v_mov_b32_e32 v62, 0xbe800000 ; 7E7C02FF BE800000 v_mad_f32 v8, v11, v8, v62 ; D2820008 04FA110B v_max_f32_e32 v8, 0, v8 ; 20101080 v_mul_f32_e32 v8, 0x3faaaaab, v8 ; 101010FF 3FAAAAAB v_min_f32_e32 v8, 0x3f400000, v8 ; 1E1010FF 3F400000 v_mul_f32_e32 v9, v9, v8 ; 10121109 v_add_f32_e32 v10, v52, v56 ; 06147134 v_add_f32_e32 v10, v10, v48 ; 0614610A v_add_f32_e32 v10, v42, v10 ; 0614152A v_add_f32_e32 v11, v30, v35 ; 0616471E v_add_f32_e32 v11, v11, v18 ; 0616250B v_add_f32_e32 v11, v11, v22 ; 06162D0B v_add_f32_e32 v11, v11, v26 ; 0616350B v_add_f32_e32 v10, v10, v11 ; 0614170A v_mul_f32_e32 v10, v10, v8 ; 1014110A v_add_f32_e32 v11, v51, v55 ; 06166F33 v_add_f32_e32 v11, v11, v47 ; 06165F0B v_add_f32_e32 v11, v41, v11 ; 06161729 v_add_f32_e32 v16, v29, v34 ; 0620451D v_add_f32_e32 v16, v16, v17 ; 06202310 v_add_f32_e32 v16, v16, v21 ; 06202B10 v_add_f32_e32 v16, v16, v25 ; 06203310 v_add_f32_e32 v11, v11, v16 ; 0616210B v_mul_f32_e32 v11, v11, v8 ; 1016110B v_add_f32_e32 v16, v53, v57 ; 06207335 v_add_f32_e32 v16, v16, v49 ; 06206310 v_add_f32_e32 v16, v43, v16 ; 0620212B v_add_f32_e32 v29, v31, v36 ; 063A491F v_add_f32_e32 v17, v29, v19 ; 0622271D v_add_f32_e32 v17, v17, v23 ; 06222F11 v_add_f32_e32 v17, v17, v27 ; 06223711 v_add_f32_e32 v16, v16, v17 ; 06202310 v_mul_f32_e32 v16, v16, v8 ; 10201110 v_xor_b32_e32 v19, v14, v61 ; 3A267B0E v_xor_b32_e32 v20, v15, v61 ; 3A287B0F v_cmp_lt_f32_e32 vcc, v33, v38 ; 7C024D21 v_cndmask_b32_e64 v21, 0, -1, vcc ; D2000815 01A98280 v_max_f32_e32 v17, v59, v60 ; 2022793B v_mul_f32_e32 v22, v62, v17 ; 102C233E s_mov_b64 s[4:5], 0 ; BE840480 v_mov_b32_e32 v31, v12 ; 7E3E030C v_mov_b32_e32 v17, v6 ; 7E220306 v_mov_b32_e32 v18, v6 ; 7E240306 v_mov_b32_e32 v32, v12 ; 7E40030C v_mov_b32_e32 v27, v40 ; 7E360328 v_mov_b32_e32 v28, v39 ; 7E380327 v_mov_b32_e32 v23, v46 ; 7E2E032E v_mov_b32_e32 v24, v45 ; 7E30032D v_cmp_gt_i32_e64 s[6:7], v32, 15 ; D1080006 00011F20 v_cndmask_b32_e64 v33, 0, -1, s[6:7] ; D2000821 00198280 v_cmp_eq_i32_e64 s[6:7], v33, 0 ; D1040006 00010121 s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406 s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E s_cbranch_execz BB0_14 ; BF880000 v_cmp_ge_f32_e64 s[20:21], -v31, 0 ; D00C0014 2001011F v_cndmask_b32_e64 v33, 0, -1, s[20:21] ; D2000821 00518280 v_cmp_ne_i32_e64 s[20:21], v33, 0 ; D10A0014 00010121 v_cndmask_b32_e64 v33, 0, 1.0, s[20:21] ; D2000821 0051E480 v_cmp_neq_f32_e64 s[20:21], v33, -v33 ; D01A0014 40024321 v_cndmask_b32_e64 v33, 0, -1, s[20:21] ; D2000821 00518280 v_cmp_eq_i32_e64 s[20:21], v21, 0 ; D1040014 00010115 s_and_saveexec_b64 s[20:21], s[20:21] ; BE942414 s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E s_cbranch_execz BB0_21 ; BF880000 v_cmp_ne_i32_e64 s[22:23], v33, 0 ; D10A0016 00010121 v_mov_b32_e32 v34, v18 ; 7E440312 s_and_saveexec_b64 s[22:23], s[22:23] ; BE962416 s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E v_mul_f32_e32 v29, 0, v27 ; 103A3680 image_sample_l v[34:35], 3, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[12:19], s[8:11] ; F0900300 0043221B v_mov_b32_e32 v36, 0x3ffb4a7d ; 7E4802FF 3FFB4A7D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v34, v36, v35, v34 ; D2820022 048A4724 s_or_b64 exec, exec, s[22:23] ; 88FE167E v_cmp_ge_f32_e64 s[22:23], -v12, 0 ; D00C0016 2001010C v_cndmask_b32_e64 v35, 0, -1, s[22:23] ; D2000823 00598280 v_cmp_ne_i32_e64 s[22:23], v35, 0 ; D10A0016 00010123 v_cndmask_b32_e64 v35, 0, 1.0, s[22:23] ; D2000823 0059E480 v_cmp_neq_f32_e64 s[22:23], v35, -v35 ; D01A0016 40024723 v_cndmask_b32_e64 v35, 0, -1, s[22:23] ; D2000823 00598280 v_cmp_ne_i32_e64 s[22:23], v35, 0 ; D10A0016 00010123 v_mov_b32_e32 v35, v17 ; 7E460311 s_and_saveexec_b64 s[22:23], s[22:23] ; BE962416 s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E v_mul_f32_e32 v25, 0, v23 ; 10322E80 image_sample_l v[35:36], 3, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[12:19], s[8:11] ; F0900300 00432317 v_mov_b32_e32 v37, 0x3ffb4a7d ; 7E4A02FF 3FFB4A7D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v35, v37, v36, v35 ; D2820023 048E4925 s_or_b64 exec, exec, s[22:23] ; 88FE167E s_or_saveexec_b64 s[20:21], s[20:21] ; BE942514 s_xor_b64 exec, exec, s[20:21] ; 89FE147E s_cbranch_execz BB0_17 ; BF880000 v_cmp_ne_i32_e64 s[22:23], v33, 0 ; D10A0016 00010121 s_and_saveexec_b64 s[22:23], s[22:23] ; BE962416 s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E v_mul_f32_e32 v29, 0, v27 ; 103A3680 image_sample_l v18, 8, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[12:19], s[8:11] ; F0900800 0043121B s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[22:23] ; 88FE167E v_cmp_ge_f32_e64 s[22:23], -v12, 0 ; D00C0016 2001010C v_cndmask_b32_e64 v33, 0, -1, s[22:23] ; D2000821 00598280 v_cmp_ne_i32_e64 s[22:23], v33, 0 ; D10A0016 00010121 v_cndmask_b32_e64 v33, 0, 1.0, s[22:23] ; D2000821 0059E480 v_cmp_neq_f32_e64 s[22:23], v33, -v33 ; D01A0016 40024321 v_cndmask_b32_e64 v33, 0, -1, s[22:23] ; D2000821 00598280 v_cmp_ne_i32_e64 s[22:23], v33, 0 ; D10A0016 00010121 s_and_saveexec_b64 s[22:23], s[22:23] ; BE962416 s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E v_mul_f32_e32 v25, 0, v23 ; 10322E80 image_sample_l v17, 8, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[12:19], s[8:11] ; F0900800 00431117 s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[22:23] ; 88FE167E v_mov_b32_e32 v34, v18 ; 7E440312 v_mov_b32_e32 v35, v17 ; 7E460311 s_or_b64 exec, exec, s[20:21] ; 88FE147E v_mov_b32_e32 v17, v35 ; 7E220323 v_mov_b32_e32 v18, v34 ; 7E240322 v_add_f32_e32 v33, v18, v13 ; 06421B12 v_add_f32_e64 v33, v22, |v33| ; D2060221 00024316 v_cmp_ge_f32_e64 s[20:21], v33, 0 ; D00C0014 00010121 v_cndmask_b32_e64 v33, 0, -1, s[20:21] ; D2000821 00518280 v_cmp_ne_i32_e64 s[20:21], v33, 0 ; D10A0014 00010121 v_cndmask_b32_e64 v33, 0, 1.0, s[20:21] ; D2000821 0051E480 v_add_f32_e32 v33, v33, v31 ; 06423F21 v_cmp_ge_f32_e64 s[20:21], -v33, 0 ; D00C0014 20010121 v_cndmask_b32_e64 v31, 0, -1, s[20:21] ; D200081F 00518280 v_cmp_ne_i32_e64 s[20:21], v31, 0 ; D10A0014 0001011F v_cndmask_b32_e64 v31, 1.0, 0, s[20:21] ; D200081F 105100F2 v_add_f32_e32 v34, v17, v13 ; 06441B11 v_add_f32_e64 v34, v22, |v34| ; D2060222 00024516 v_cmp_ge_f32_e64 s[20:21], v34, 0 ; D00C0014 00010122 v_cndmask_b32_e64 v34, 0, -1, s[20:21] ; D2000022 00518280 v_cmp_ne_i32_e64 s[20:21], v34, 0 ; D10A0014 00010122 v_cndmask_b32_e64 v34, 0, 1.0, s[20:21] ; D2000022 0051E480 v_add_f32_e32 v34, v34, v12 ; 06441922 v_cmp_ge_f32_e64 s[20:21], -v34, 0 ; D00C0014 20010122 v_cndmask_b32_e64 v12, 0, -1, s[20:21] ; D200000C 00518280 v_cmp_ne_i32_e64 s[20:21], v12, 0 ; D10A0014 0001010C v_cndmask_b32_e64 v12, 1.0, 0, s[20:21] ; D200000C 105100F2 v_mul_f32_e32 v35, v31, v12 ; 1046191F v_cmp_neq_f32_e64 s[20:21], v35, -v35 ; D01A0014 40024723 v_cndmask_b32_e64 v35, 0, -1, s[20:21] ; D2000823 00518280 v_cmp_eq_i32_e64 s[20:21], v35, 0 ; D1040014 00010123 s_and_saveexec_b64 s[20:21], s[20:21] ; BE942414 s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E s_cbranch_execz BB0_7 ; BF880000 v_add_f32_e32 v35, v28, v19 ; 0646271C v_cmp_ge_f32_e64 s[22:23], -v33, 0 ; D00C0016 20010121 v_cndmask_b32_e64 v33, 0, -1, s[22:23] ; D2000821 00598280 v_cmp_ne_i32_e64 s[22:23], v33, 0 ; D10A0016 00010121 v_cndmask_b32_e64 v39, v28, v35, s[22:23] ; D2000827 005A471C v_add_f32_e32 v33, v27, v20 ; 0642291B v_cndmask_b32_e64 v40, v27, v33, s[22:23] ; D2000028 185A431B v_add_f32_e32 v33, v24, v14 ; 06421D18 v_cmp_ge_f32_e64 s[22:23], -v34, 0 ; D00C0016 20010122 v_cndmask_b32_e64 v34, 0, -1, s[22:23] ; D2000022 00598280 v_cmp_ne_i32_e64 s[22:23], v34, 0 ; D10A0016 00010122 v_cndmask_b32_e64 v45, v24, v33, s[22:23] ; D200082D 005A4318 v_add_f32_e32 v33, v23, v15 ; 06421F17 v_cndmask_b32_e64 v46, v23, v33, s[22:23] ; D200002E 185A4317 v_add_i32_e32 v32, 1, v32 ; 4A404081 s_or_b64 exec, exec, s[20:21] ; 88FE147E s_or_b64 s[4:5], s[20:21], s[4:5] ; 88840414 s_or_b64 exec, exec, s[6:7] ; 88FE067E s_or_b64 s[4:5], s[6:7], s[4:5] ; 88840406 s_andn2_b64 exec, exec, s[4:5] ; 8AFE047E s_cbranch_execnz BB0_6 ; BF890000 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_subrev_f32_e32 v12, v27, v2 ; 0A18051B v_subrev_f32_e32 v13, v28, v3 ; 0A1A071C v_cmp_ge_f32_e64 s[4:5], v1, 0 ; D00C0004 00010101 v_mov_b32_e32 v21, 0 ; 7E2A0280 v_cndmask_b32_e64 v1, 0, -1, s[4:5] ; D2000801 00118280 v_cmp_ne_i32_e64 s[4:5], v1, 0 ; D10A0004 00010101 v_cndmask_b32_e64 v1, v13, v12, s[4:5] ; D2000801 0812190D v_subrev_f32_e32 v12, v2, v23 ; 0A182F02 v_subrev_f32_e32 v13, v3, v24 ; 0A1A3103 v_cndmask_b32_e64 v12, v13, v12, s[4:5] ; D200000C 0812190D v_min_f32_e32 v13, v1, v12 ; 1E1A1901 v_add_f32_e32 v14, v12, v1 ; 061C030C v_rcp_f32_e32 v14, v14 ; 7E1C550E v_mad_f32 v13, -v13, v14, 0.5 ; D282000D 23C21D0D v_subrev_f32_e32 v1, v12, v1 ; 0A02030C v_cmp_ge_f32_e64 s[6:7], v1, 0 ; D00C0006 00010101 v_cndmask_b32_e64 v1, 0, -1, s[6:7] ; D2000801 00198280 v_cmp_ne_i32_e64 s[6:7], v1, 0 ; D10A0006 00010101 v_cndmask_b32_e64 v1, v18, v17, s[6:7] ; D2000801 101A2312 v_subrev_f32_e32 v1, v6, v1 ; 0A020306 v_cmp_ge_f32_e64 s[6:7], v1, 0 ; D00C0006 00010101 v_cndmask_b32_e64 v1, 0, -1, s[6:7] ; D2000801 00198280 v_cmp_ne_i32_e64 s[6:7], v1, 0 ; D10A0006 00010101 v_cndmask_b32_e64 v1, 1.0, 0, s[6:7] ; D2000801 101900F2 v_subrev_f32_e32 v1, v1, v7 ; 0A020F01 v_cmp_ge_f32_e64 s[6:7], -|v1|, 0 ; D00C0106 20010101 v_cndmask_b32_e64 v1, 0, -1, s[6:7] ; D2000801 00198280 v_cmp_ne_i32_e64 s[6:7], v1, 0 ; D10A0006 00010101 v_cndmask_b32_e64 v0, v0, 0, s[6:7] ; D2000000 00190100 v_mul_f32_e32 v0, v13, v0 ; 1000010D v_cndmask_b32_e64 v1, 0, v0, s[4:5] ; D2000801 00120080 v_add_f32_e32 v20, v3, v1 ; 06280303 v_cndmask_b32_e64 v0, v0, 0, s[4:5] ; D2000000 00110100 v_add_f32_e32 v19, v2, v0 ; 06260102 image_sample_l v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[12:19], s[8:11] ; F0900F00 00430C13 v_mov_b32_e32 v0, 0x3de38e39 ; 7E0002FF 3DE38E39 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v9, v0, v15 ; D2820001 043E0109 v_mad_f32 v20, -v8, v15, v1 ; D2820014 24061F08 v_mad_f32 v1, v16, v0, v14 ; D2820001 043A0110 v_mad_f32 v19, -v8, v14, v1 ; D2820013 24061D08 v_mad_f32 v1, v10, v0, v13 ; D2820001 0436010A v_mad_f32 v18, -v8, v13, v1 ; D2820012 24061B08 v_mad_f32 v0, v11, v0, v12 ; D2820000 0432010B v_mad_f32 v17, -v8, v12, v0 ; D2820011 24021908 s_or_b64 exec, exec, s[2:3] ; 88FE027E v_max3_f32 v0, v17, v18, v19 ; D2A80000 044E2511 v_mul_f32_e32 v0, 4.0, v0 ; 100000F6 v_max_f32_e32 v0, v0, v20 ; 20002900 v_add_f32_e32 v1, -0.5, v2 ; 060204F1 v_sub_f32_e64 v1, 0.5, |v1| ; D2080201 000202F0 v_mul_f32_e32 v1, 0x41a00000, v1 ; 100202FF 41A00000 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_cvt_pkrtz_f16_f32_e32 v0, v19, v0 ; 5E000113 v_cvt_pkrtz_f16_f32_e32 v1, v17, v18 ; 5E022511 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000