SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = extractelement <4 x float> %14, i32 3 %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 %21 = add i32 %5, %7 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = extractelement <4 x float> %22, i32 2 %26 = extractelement <4 x float> %22, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 4 Code Size: 40 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call i32 @llvm.SI.packf16(float %33, float %34) %38 = bitcast i32 %37 to float %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %38, float %40, float %38, float %40) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v4, v0, v1 ; 5E080300 v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702 exp 15, 0, 1, 1, 1, v4, v0, v4, v0 ; F8001C0F 00040004 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xy, IN[2].xyxx 5: MOV OUT[2], TEMP[1] 6: MOV OUT[1], IN[1] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = fmul float %13, %33 %52 = fmul float %14, %33 %53 = fmul float %15, %33 %54 = fmul float %16, %33 %55 = fmul float %17, %34 %56 = fadd float %55, %51 %57 = fmul float %18, %34 %58 = fadd float %57, %52 %59 = fmul float %19, %34 %60 = fadd float %59, %53 %61 = fmul float %20, %34 %62 = fadd float %61, %54 %63 = fmul float %21, %35 %64 = fadd float %63, %56 %65 = fmul float %22, %35 %66 = fadd float %65, %58 %67 = fmul float %23, %35 %68 = fadd float %67, %60 %69 = fmul float %24, %35 %70 = fadd float %69, %62 %71 = fmul float %25, %36 %72 = fadd float %71, %64 %73 = fmul float %26, %36 %74 = fadd float %73, %66 %75 = fmul float %27, %36 %76 = fadd float %75, %68 %77 = fmul float %28, %36 %78 = fadd float %77, %70 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %49, float %50, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %72, float %74, float %76, float %78) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s16, s[0:3], 0x0 ; C2080100 s_buffer_load_dword s17, s[0:3], 0x1 ; C2088101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[8:11], 0 idxen ; E00C2000 80020A00 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s12, s[0:3], 0xa ; C206010A s_buffer_load_dword s13, s[0:3], 0xb ; C206810B s_buffer_load_dword s14, s[0:3], 0xc ; C207010C s_buffer_load_dword s15, s[0:3], 0xd ; C207810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(2) lgkmcnt(0) ; BF8C0072 v_mul_f32_e32 v0, s16, v2 ; 10000410 s_waitcnt vmcnt(1) ; BF8C0771 exp 15, 32, 0, 0, 0, v6, v7, v8, v9 ; F800020F 09080706 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, s6, v3, v0 ; D2820000 04020606 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v6, s17, v2 ; 100C0411 v_mad_f32 v6, s7, v3, v6 ; D2820006 041A0607 v_mul_f32_e32 v7, s4, v2 ; 100E0404 v_mad_f32 v7, s8, v3, v7 ; D2820007 041E0608 v_mul_f32_e32 v8, s5, v2 ; 10100405 v_mad_f32 v8, s9, v3, v8 ; D2820008 04220609 v_mad_f32 v0, s10, v4, v0 ; D2820000 0402080A v_mad_f32 v6, s11, v4, v6 ; D2820006 041A080B v_mad_f32 v7, s12, v4, v7 ; D2820007 041E080C v_mad_f32 v8, s13, v4, v8 ; D2820008 0422080D v_mad_f32 v0, s14, v5, v0 ; D2820000 04020A0E v_mad_f32 v6, s15, v5, v6 ; D2820006 041A0A0F v_mad_f32 v7, s18, v5, v7 ; D2820007 041E0A12 v_mad_f32 v2, s0, v5, v8 ; D2820002 04220A00 exp 15, 33, 0, 0, 0, v10, v11, v1, v1 ; F800021F 01010B0A exp 15, 12, 0, 1, 0, v0, v6, v7, v2 ; F80008CF 02070600 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 272 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[0], IN[0], TEMP[0] 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %23, <16 x i8> %25, i32 2) %37 = extractelement <4 x float> %36, i32 0 %38 = extractelement <4 x float> %36, i32 1 %39 = extractelement <4 x float> %36, i32 2 %40 = extractelement <4 x float> %36, i32 3 %41 = fmul float %26, %37 %42 = fmul float %27, %38 %43 = fmul float %28, %39 %44 = fmul float %29, %40 %45 = call i32 @llvm.SI.packf16(float %41, float %42) %46 = bitcast i32 %45 to float %47 = call i32 @llvm.SI.packf16(float %43, float %44) %48 = bitcast i32 %47 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %46, float %48, float %46, float %48) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[4:11], s[0:3] ; F0800F00 00010606 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v6, v2 ; 10000506 v_mul_f32_e32 v1, v7, v3 ; 10020707 v_mul_f32_e32 v2, v8, v4 ; 10040908 v_mul_f32_e32 v3, v9, v5 ; 10060B09 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 116 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 4 Code Size: 40 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %33, float %34, float %35, float %36) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 0, 0, 1, 1, v0, v1, v2, v3 ; F800180F 03020100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 60 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xy, IN[2].xyxx 5: MOV OUT[2], TEMP[1] 6: MOV OUT[1], IN[1] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = fmul float %13, %33 %52 = fmul float %14, %33 %53 = fmul float %15, %33 %54 = fmul float %16, %33 %55 = fmul float %17, %34 %56 = fadd float %55, %51 %57 = fmul float %18, %34 %58 = fadd float %57, %52 %59 = fmul float %19, %34 %60 = fadd float %59, %53 %61 = fmul float %20, %34 %62 = fadd float %61, %54 %63 = fmul float %21, %35 %64 = fadd float %63, %56 %65 = fmul float %22, %35 %66 = fadd float %65, %58 %67 = fmul float %23, %35 %68 = fadd float %67, %60 %69 = fmul float %24, %35 %70 = fadd float %69, %62 %71 = fmul float %25, %36 %72 = fadd float %71, %64 %73 = fmul float %26, %36 %74 = fadd float %73, %66 %75 = fmul float %27, %36 %76 = fadd float %75, %68 %77 = fmul float %28, %36 %78 = fadd float %77, %70 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %49, float %50, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %72, float %74, float %76, float %78) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s16, s[0:3], 0x0 ; C2080100 s_buffer_load_dword s17, s[0:3], 0x1 ; C2088101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[8:11], 0 idxen ; E00C2000 80020A00 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s12, s[0:3], 0xa ; C206010A s_buffer_load_dword s13, s[0:3], 0xb ; C206810B s_buffer_load_dword s14, s[0:3], 0xc ; C207010C s_buffer_load_dword s15, s[0:3], 0xd ; C207810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(2) lgkmcnt(0) ; BF8C0072 v_mul_f32_e32 v0, s16, v2 ; 10000410 s_waitcnt vmcnt(1) ; BF8C0771 exp 15, 32, 0, 0, 0, v6, v7, v8, v9 ; F800020F 09080706 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, s6, v3, v0 ; D2820000 04020606 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v6, s17, v2 ; 100C0411 v_mad_f32 v6, s7, v3, v6 ; D2820006 041A0607 v_mul_f32_e32 v7, s4, v2 ; 100E0404 v_mad_f32 v7, s8, v3, v7 ; D2820007 041E0608 v_mul_f32_e32 v8, s5, v2 ; 10100405 v_mad_f32 v8, s9, v3, v8 ; D2820008 04220609 v_mad_f32 v0, s10, v4, v0 ; D2820000 0402080A v_mad_f32 v6, s11, v4, v6 ; D2820006 041A080B v_mad_f32 v7, s12, v4, v7 ; D2820007 041E080C v_mad_f32 v8, s13, v4, v8 ; D2820008 0422080D v_mad_f32 v0, s14, v5, v0 ; D2820000 04020A0E v_mad_f32 v6, s15, v5, v6 ; D2820006 041A0A0F v_mad_f32 v7, s18, v5, v7 ; D2820007 041E0A12 v_mad_f32 v2, s0, v5, v8 ; D2820002 04220A00 exp 15, 33, 0, 0, 0, v10, v11, v1, v1 ; F800021F 01010B0A exp 15, 12, 0, 1, 0, v0, v6, v7, v2 ; F80008CF 02070600 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 272 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[0], IN[0], TEMP[0] 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %23, <16 x i8> %25, i32 2) %37 = extractelement <4 x float> %36, i32 0 %38 = extractelement <4 x float> %36, i32 1 %39 = extractelement <4 x float> %36, i32 2 %40 = extractelement <4 x float> %36, i32 3 %41 = fmul float %26, %37 %42 = fmul float %27, %38 %43 = fmul float %28, %39 %44 = fmul float %29, %40 %45 = call i32 @llvm.SI.packf16(float %41, float %42) %46 = bitcast i32 %45 to float %47 = call i32 @llvm.SI.packf16(float %43, float %44) %48 = bitcast i32 %47 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %46, float %48, float %46, float %48) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[4:11], s[0:3] ; F0800F00 00010606 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v6, v2 ; 10000506 v_mul_f32_e32 v1, v7, v3 ; 10020707 v_mul_f32_e32 v2, v8, v4 ; 10040908 v_mul_f32_e32 v3, v9, v5 ; 10060B09 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 116 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..3] DCL TEMP[0], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV OUT[1], IN[1] 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %13, %33 %46 = fmul float %14, %33 %47 = fmul float %15, %33 %48 = fmul float %16, %33 %49 = fmul float %17, %34 %50 = fadd float %49, %45 %51 = fmul float %18, %34 %52 = fadd float %51, %46 %53 = fmul float %19, %34 %54 = fadd float %53, %47 %55 = fmul float %20, %34 %56 = fadd float %55, %48 %57 = fmul float %21, %35 %58 = fadd float %57, %50 %59 = fmul float %22, %35 %60 = fadd float %59, %52 %61 = fmul float %23, %35 %62 = fadd float %61, %54 %63 = fmul float %24, %35 %64 = fadd float %63, %56 %65 = fmul float %25, %36 %66 = fadd float %65, %58 %67 = fmul float %26, %36 %68 = fadd float %67, %60 %69 = fmul float %27, %36 %70 = fadd float %69, %62 %71 = fmul float %28, %36 %72 = fadd float %71, %64 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %66, float %68, float %70, float %72) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v1 ; 10000204 v_mad_f32 v0, s8, v2, v0 ; D2820000 04020408 v_mul_f32_e32 v9, s5, v1 ; 10120205 v_mad_f32 v9, s9, v2, v9 ; D2820009 04260409 v_mul_f32_e32 v10, s6, v1 ; 10140206 v_mad_f32 v10, s10, v2, v10 ; D282000A 042A040A v_mul_f32_e32 v11, s7, v1 ; 10160207 v_mad_f32 v11, s11, v2, v11 ; D282000B 042E040B v_mad_f32 v0, s12, v3, v0 ; D2820000 0402060C v_mad_f32 v9, s13, v3, v9 ; D2820009 0426060D v_mad_f32 v10, s14, v3, v10 ; D282000A 042A060E v_mad_f32 v11, s15, v3, v11 ; D282000B 042E060F v_mad_f32 v0, s16, v4, v0 ; D2820000 04020810 v_mad_f32 v9, s17, v4, v9 ; D2820009 04260811 v_mad_f32 v10, s18, v4, v10 ; D282000A 042A0812 v_mad_f32 v1, s0, v4, v11 ; D2820001 042E0800 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 exp 15, 12, 0, 1, 0, v0, v9, v10, v1 ; F80008CF 010A0900 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 240 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702 v_cvt_pkrtz_f16_f32_e32 v1, v4, v5 ; 5E020B04 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..7] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MUL TEMP[1], CONST[4], IN[1].xxxx 2: MAD TEMP[1], CONST[5], IN[1].yyyy, TEMP[1] 3: MAD TEMP[1], CONST[6], IN[1].zzzz, TEMP[1] 4: MAD TEMP[1].xy, CONST[7], IN[1].wwww, TEMP[1] 5: MOV TEMP[0].xy, TEMP[1].xyxx 6: MUL TEMP[1], CONST[0], IN[0].xxxx 7: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[1] 8: MAD TEMP[1], CONST[2], IN[0].zzzz, TEMP[1] 9: MAD TEMP[1], CONST[3], IN[0].wwww, TEMP[1] 10: MOV OUT[0], TEMP[1] 11: MOV OUT[1], TEMP[0] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %29, %49 %54 = fmul float %30, %49 %55 = fmul float %31, %50 %56 = fadd float %55, %53 %57 = fmul float %32, %50 %58 = fadd float %57, %54 %59 = fmul float %33, %51 %60 = fadd float %59, %56 %61 = fmul float %34, %51 %62 = fadd float %61, %58 %63 = fmul float %35, %52 %64 = fadd float %63, %60 %65 = fmul float %36, %52 %66 = fadd float %65, %62 %67 = fmul float %13, %41 %68 = fmul float %14, %41 %69 = fmul float %15, %41 %70 = fmul float %16, %41 %71 = fmul float %17, %42 %72 = fadd float %71, %67 %73 = fmul float %18, %42 %74 = fadd float %73, %68 %75 = fmul float %19, %42 %76 = fadd float %75, %69 %77 = fmul float %20, %42 %78 = fadd float %77, %70 %79 = fmul float %21, %43 %80 = fadd float %79, %72 %81 = fmul float %22, %43 %82 = fadd float %81, %74 %83 = fmul float %23, %43 %84 = fadd float %83, %76 %85 = fmul float %24, %43 %86 = fadd float %85, %78 %87 = fmul float %25, %44 %88 = fadd float %87, %80 %89 = fmul float %26, %44 %90 = fadd float %89, %82 %91 = fmul float %27, %44 %92 = fadd float %91, %84 %93 = fmul float %28, %44 %94 = fadd float %93, %86 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %64, float %66, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %88, float %90, float %92, float %94) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0xf ; C206010F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_buffer_load_dword s6, s[0:3], 0x14 ; C2030114 s_buffer_load_dword s7, s[0:3], 0x15 ; C2038115 s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x3 ; C2058103 s_buffer_load_dword s13, s[0:3], 0x4 ; C2068104 s_buffer_load_dword s14, s[0:3], 0x5 ; C2070105 s_buffer_load_dword s15, s[0:3], 0x6 ; C2078106 s_buffer_load_dword s16, s[0:3], 0x7 ; C2080107 s_buffer_load_dword s17, s[0:3], 0x8 ; C2088108 s_buffer_load_dword s18, s[0:3], 0x9 ; C2090109 s_buffer_load_dword s19, s[0:3], 0x18 ; C2098118 s_buffer_load_dword s20, s[0:3], 0x19 ; C20A0119 s_buffer_load_dword s21, s[0:3], 0x1c ; C20A811C s_buffer_load_dword s22, s[0:3], 0x1d ; C20B011D s_buffer_load_dword s23, s[0:3], 0xa ; C20B810A s_buffer_load_dword s24, s[0:3], 0xb ; C20C010B s_buffer_load_dword s25, s[0:3], 0xc ; C20C810C s_buffer_load_dword s26, s[0:3], 0xd ; C20D010D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s8, v1 ; 10000208 v_mul_f32_e32 v9, s4, v5 ; 10120A04 v_mad_f32 v9, s6, v6, v9 ; D2820009 04260C06 v_mul_f32_e32 v10, s5, v5 ; 10140A05 v_mad_f32 v10, s7, v6, v10 ; D282000A 042A0C07 v_mad_f32 v9, s19, v7, v9 ; D2820009 04260E13 v_mad_f32 v10, s20, v7, v10 ; D282000A 042A0E14 v_mad_f32 v9, s21, v8, v9 ; D2820009 04261015 v_mad_f32 v5, s22, v8, v10 ; D2820005 042A1016 v_mad_f32 v0, s13, v2, v0 ; D2820000 0402040D v_mul_f32_e32 v6, s9, v1 ; 100C0209 v_mad_f32 v6, s14, v2, v6 ; D2820006 041A040E v_mul_f32_e32 v7, s10, v1 ; 100E020A v_mad_f32 v7, s15, v2, v7 ; D2820007 041E040F v_mul_f32_e32 v8, s11, v1 ; 1010020B v_mad_f32 v8, s16, v2, v8 ; D2820008 04220410 v_mad_f32 v0, s17, v3, v0 ; D2820000 04020611 v_mad_f32 v6, s18, v3, v6 ; D2820006 041A0612 v_mad_f32 v7, s23, v3, v7 ; D2820007 041E0617 v_mad_f32 v8, s24, v3, v8 ; D2820008 04220618 v_mad_f32 v0, s25, v4, v0 ; D2820000 04020819 v_mad_f32 v6, s26, v4, v6 ; D2820006 041A081A v_mad_f32 v7, s0, v4, v7 ; D2820007 041E0800 v_mad_f32 v1, s12, v4, v8 ; D2820001 0422080C v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 32, 0, 0, 0, v9, v5, v2, v2 ; F800020F 02020509 exp 15, 12, 0, 1, 0, v0, v6, v7, v1 ; F80008CF 01070600 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 12 Code Size: 328 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[1..2] DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[1].x, TEMP[0].wwww, CONST[2].xxxx 3: MOV TEMP[1].w, TEMP[1].xxxx 4: MUL TEMP[1].xyz, TEMP[0].xyzz, CONST[1].xyzz 5: MOV OUT[0], TEMP[1] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, align 32, !tbaa !0 %30 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %34 = bitcast float %32 to i32 %35 = bitcast float %33 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %29, <16 x i8> %31, i32 2) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = fmul float %42, %27 %44 = fmul float %39, %24 %45 = fmul float %40, %25 %46 = fmul float %41, %26 %47 = call i32 @llvm.SI.packf16(float %44, float %45) %48 = bitcast i32 %47 to float %49 = call i32 @llvm.SI.packf16(float %46, float %43) %50 = bitcast i32 %49 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %48, float %50, float %48, float %50) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 s_buffer_load_dword s0, s[0:3], 0x8 ; C2000108 image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800F00 00430002 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v0 ; 10080004 v_mul_f32_e32 v5, s5, v1 ; 100A0205 v_mul_f32_e32 v6, s6, v2 ; 100C0406 v_mul_f32_e32 v0, s0, v3 ; 10000600 v_cvt_pkrtz_f16_f32_e32 v1, v4, v5 ; 5E020B04 v_cvt_pkrtz_f16_f32_e32 v0, v6, v0 ; 5E000106 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 104 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL CONST[0..3] DCL TEMP[0], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %13, %33 %38 = fmul float %14, %33 %39 = fmul float %15, %33 %40 = fmul float %16, %33 %41 = fmul float %17, %34 %42 = fadd float %41, %37 %43 = fmul float %18, %34 %44 = fadd float %43, %38 %45 = fmul float %19, %34 %46 = fadd float %45, %39 %47 = fmul float %20, %34 %48 = fadd float %47, %40 %49 = fmul float %21, %35 %50 = fadd float %49, %42 %51 = fmul float %22, %35 %52 = fadd float %51, %44 %53 = fmul float %23, %35 %54 = fadd float %53, %46 %55 = fmul float %24, %35 %56 = fadd float %55, %48 %57 = fmul float %25, %36 %58 = fadd float %57, %50 %59 = fmul float %26, %36 %60 = fadd float %59, %52 %61 = fmul float %27, %36 %62 = fadd float %61, %54 %63 = fmul float %28, %36 %64 = fadd float %63, %56 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %60, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v0 ; 10080004 v_mad_f32 v4, s8, v1, v4 ; D2820004 04120208 v_mul_f32_e32 v5, s5, v0 ; 100A0005 v_mad_f32 v5, s9, v1, v5 ; D2820005 04160209 v_mul_f32_e32 v6, s6, v0 ; 100C0006 v_mad_f32 v6, s10, v1, v6 ; D2820006 041A020A v_mul_f32_e32 v7, s7, v0 ; 100E0007 v_mad_f32 v7, s11, v1, v7 ; D2820007 041E020B v_mad_f32 v4, s12, v2, v4 ; D2820004 0412040C v_mad_f32 v5, s13, v2, v5 ; D2820005 0416040D v_mad_f32 v6, s14, v2, v6 ; D2820006 041A040E v_mad_f32 v7, s15, v2, v7 ; D2820007 041E040F v_mad_f32 v4, s16, v3, v4 ; D2820004 04120610 v_mad_f32 v5, s17, v3, v5 ; D2820005 04160611 v_mad_f32 v6, s18, v3, v6 ; D2820006 041A0612 v_mad_f32 v0, s0, v3, v7 ; D2820000 041E0600 exp 15, 12, 0, 1, 0, v4, v5, v6, v0 ; F80008CF 00060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 220 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxx 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 1.000000e+00) %23 = bitcast i32 %22 to float %24 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 1.000000e+00) %25 = bitcast i32 %24 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25) ret void } ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: v_cvt_pkrtz_f16_f32_e64 v0, 1.0, 1.0 ; D25E0000 0001E4F2 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 20 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..7] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MUL TEMP[1], CONST[4], IN[2].xxxx 2: MAD TEMP[1], CONST[5], IN[2].yyyy, TEMP[1] 3: MAD TEMP[1], CONST[6], IN[2].zzzz, TEMP[1] 4: MAD TEMP[1].xy, CONST[7], IN[2].wwww, TEMP[1] 5: MOV TEMP[0].xy, TEMP[1].xyxx 6: MUL TEMP[1], CONST[0], IN[0].xxxx 7: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[1] 8: MAD TEMP[1], CONST[2], IN[0].zzzz, TEMP[1] 9: MAD TEMP[1], CONST[3], IN[0].wwww, TEMP[1] 10: MOV OUT[2], IN[1] 11: MOV OUT[0], TEMP[1] 12: MOV OUT[1], TEMP[0] 13: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = extractelement <4 x float> %56, i32 3 %61 = fmul float %29, %57 %62 = fmul float %30, %57 %63 = fmul float %31, %58 %64 = fadd float %63, %61 %65 = fmul float %32, %58 %66 = fadd float %65, %62 %67 = fmul float %33, %59 %68 = fadd float %67, %64 %69 = fmul float %34, %59 %70 = fadd float %69, %66 %71 = fmul float %35, %60 %72 = fadd float %71, %68 %73 = fmul float %36, %60 %74 = fadd float %73, %70 %75 = fmul float %13, %41 %76 = fmul float %14, %41 %77 = fmul float %15, %41 %78 = fmul float %16, %41 %79 = fmul float %17, %42 %80 = fadd float %79, %75 %81 = fmul float %18, %42 %82 = fadd float %81, %76 %83 = fmul float %19, %42 %84 = fadd float %83, %77 %85 = fmul float %20, %42 %86 = fadd float %85, %78 %87 = fmul float %21, %43 %88 = fadd float %87, %80 %89 = fmul float %22, %43 %90 = fadd float %89, %82 %91 = fmul float %23, %43 %92 = fadd float %91, %84 %93 = fmul float %24, %43 %94 = fadd float %93, %86 %95 = fmul float %25, %44 %96 = fadd float %95, %88 %97 = fmul float %26, %44 %98 = fadd float %97, %90 %99 = fmul float %27, %44 %100 = fadd float %99, %92 %101 = fmul float %28, %44 %102 = fadd float %101, %94 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %72, float %74, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %49, float %50, float %51, float %52) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %98, float %100, float %102) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s16, s[0:3], 0xf ; C208010F s_buffer_load_dword s17, s[0:3], 0x10 ; C2088110 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[8:11], 0 idxen ; E00C2000 80020900 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114 s_buffer_load_dword s6, s[0:3], 0x15 ; C2030115 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s9, s[0:3], 0x2 ; C2048102 s_buffer_load_dword s10, s[0:3], 0x3 ; C2050103 s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104 s_buffer_load_dword s12, s[0:3], 0x18 ; C2060118 s_buffer_load_dword s13, s[0:3], 0x19 ; C2068119 s_buffer_load_dword s14, s[0:3], 0x1c ; C207011C s_buffer_load_dword s15, s[0:3], 0x1d ; C207811D s_buffer_load_dword s18, s[0:3], 0x5 ; C2090105 s_buffer_load_dword s19, s[0:3], 0x6 ; C2098106 s_buffer_load_dword s20, s[0:3], 0x7 ; C20A0107 s_buffer_load_dword s21, s[0:3], 0x8 ; C20A8108 s_buffer_load_dword s22, s[0:3], 0x9 ; C20B0109 s_buffer_load_dword s23, s[0:3], 0xa ; C20B810A s_buffer_load_dword s24, s[0:3], 0xb ; C20C010B s_buffer_load_dword s25, s[0:3], 0xc ; C20C810C s_buffer_load_dword s26, s[0:3], 0xd ; C20D010D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s7, v1 ; 10000207 v_mad_f32 v0, s11, v2, v0 ; D2820000 0402040B v_mul_f32_e32 v13, s17, v9 ; 101A1211 v_mad_f32 v13, s5, v10, v13 ; D282000D 04361405 v_mul_f32_e32 v14, s4, v9 ; 101C1204 v_mad_f32 v14, s6, v10, v14 ; D282000E 043A1406 v_mad_f32 v13, s12, v11, v13 ; D282000D 0436160C v_mad_f32 v14, s13, v11, v14 ; D282000E 043A160D v_mad_f32 v13, s14, v12, v13 ; D282000D 0436180E v_mad_f32 v9, s15, v12, v14 ; D2820009 043A180F v_mul_f32_e32 v10, s8, v1 ; 10140208 v_mad_f32 v10, s18, v2, v10 ; D282000A 042A0412 v_mul_f32_e32 v11, s9, v1 ; 10160209 v_mad_f32 v11, s19, v2, v11 ; D282000B 042E0413 v_mul_f32_e32 v12, s10, v1 ; 1018020A v_mad_f32 v12, s20, v2, v12 ; D282000C 04320414 v_mad_f32 v0, s21, v3, v0 ; D2820000 04020615 v_mad_f32 v10, s22, v3, v10 ; D282000A 042A0616 v_mad_f32 v11, s23, v3, v11 ; D282000B 042E0617 v_mad_f32 v12, s24, v3, v12 ; D282000C 04320618 v_mad_f32 v0, s25, v4, v0 ; D2820000 04020819 v_mad_f32 v10, s26, v4, v10 ; D282000A 042A081A v_mad_f32 v11, s0, v4, v11 ; D282000B 042E0800 v_mad_f32 v1, s16, v4, v12 ; D2820001 04320810 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 32, 0, 0, 0, v13, v9, v2, v2 ; F800020F 0202090D exp 15, 33, 0, 0, 0, v5, v6, v7, v8 ; F800021F 08070605 exp 15, 12, 0, 1, 0, v0, v10, v11, v1 ; F80008CF 010B0A00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Code Size: 348 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[1..3] DCL TEMP[0..2], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[1].x, TEMP[0].wwww, CONST[3].xxxx 3: MOV TEMP[1].w, TEMP[1].xxxx 4: MAD TEMP[2].xyz, IN[1].xyzz, CONST[2].xyzz, CONST[1].xyzz 5: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[2].xyzz 6: MOV OUT[0], TEMP[1] 7: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0 %33 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %40 = bitcast float %35 to i32 %41 = bitcast float %36 to i32 %42 = insertelement <2 x i32> undef, i32 %40, i32 0 %43 = insertelement <2 x i32> %42, i32 %41, i32 1 %44 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %43, <32 x i8> %32, <16 x i8> %34, i32 2) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = extractelement <4 x float> %44, i32 3 %49 = fmul float %48, %30 %50 = fmul float %37, %27 %51 = fadd float %50, %24 %52 = fmul float %38, %28 %53 = fadd float %52, %25 %54 = fmul float %39, %29 %55 = fadd float %54, %26 %56 = fmul float %45, %51 %57 = fmul float %46, %53 %58 = fmul float %47, %55 %59 = call i32 @llvm.SI.packf16(float %56, float %57) %60 = bitcast i32 %59 to float %61 = call i32 @llvm.SI.packf16(float %58, float %49) %62 = bitcast i32 %61 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %60, float %62, float %60, float %62) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s12, s[0:3], 0x6 ; C2060106 s_buffer_load_dword s13, s[0:3], 0x8 ; C2068108 s_buffer_load_dword s14, s[0:3], 0x9 ; C2070109 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_buffer_load_dword s6, s[0:3], 0xc ; C203010C s_buffer_load_dword s0, s[0:3], 0xa ; C200010A v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800F00 00440002 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v7, s6, v3 ; 100E0606 v_mov_b32_e32 v8, s4 ; 7E100204 v_mad_f32 v4, s13, v4, v8 ; D2820004 0422080D v_mov_b32_e32 v8, s5 ; 7E100205 v_mad_f32 v5, s14, v5, v8 ; D2820005 04220A0E v_mov_b32_e32 v8, s12 ; 7E10020C v_mad_f32 v6, s0, v6, v8 ; D2820006 04220C00 v_mul_f32_e32 v4, v4, v0 ; 10080104 v_mul_f32_e32 v5, v5, v1 ; 100A0305 v_mul_f32_e32 v0, v6, v2 ; 10000506 v_cvt_pkrtz_f16_f32_e32 v1, v4, v5 ; 5E020B04 v_cvt_pkrtz_f16_f32_e32 v0, v0, v7 ; 5E000F00 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 180 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..7] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MUL TEMP[1], CONST[4], IN[2].xxxx 2: MAD TEMP[1], CONST[5], IN[2].yyyy, TEMP[1] 3: MAD TEMP[1], CONST[6], IN[2].zzzz, TEMP[1] 4: MAD TEMP[1].xy, CONST[7], IN[2].wwww, TEMP[1] 5: MOV TEMP[0].xy, TEMP[1].xyxx 6: MUL TEMP[1], CONST[0], IN[0].xxxx 7: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[1] 8: MAD TEMP[1], CONST[2], IN[0].zzzz, TEMP[1] 9: MAD TEMP[1], CONST[3], IN[0].wwww, TEMP[1] 10: MOV OUT[2], IN[1] 11: MOV OUT[0], TEMP[1] 12: MOV OUT[1], TEMP[0] 13: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = extractelement <4 x float> %56, i32 3 %61 = fmul float %29, %57 %62 = fmul float %30, %57 %63 = fmul float %31, %58 %64 = fadd float %63, %61 %65 = fmul float %32, %58 %66 = fadd float %65, %62 %67 = fmul float %33, %59 %68 = fadd float %67, %64 %69 = fmul float %34, %59 %70 = fadd float %69, %66 %71 = fmul float %35, %60 %72 = fadd float %71, %68 %73 = fmul float %36, %60 %74 = fadd float %73, %70 %75 = fmul float %13, %41 %76 = fmul float %14, %41 %77 = fmul float %15, %41 %78 = fmul float %16, %41 %79 = fmul float %17, %42 %80 = fadd float %79, %75 %81 = fmul float %18, %42 %82 = fadd float %81, %76 %83 = fmul float %19, %42 %84 = fadd float %83, %77 %85 = fmul float %20, %42 %86 = fadd float %85, %78 %87 = fmul float %21, %43 %88 = fadd float %87, %80 %89 = fmul float %22, %43 %90 = fadd float %89, %82 %91 = fmul float %23, %43 %92 = fadd float %91, %84 %93 = fmul float %24, %43 %94 = fadd float %93, %86 %95 = fmul float %25, %44 %96 = fadd float %95, %88 %97 = fmul float %26, %44 %98 = fadd float %97, %90 %99 = fmul float %27, %44 %100 = fadd float %99, %92 %101 = fmul float %28, %44 %102 = fadd float %101, %94 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %72, float %74, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %49, float %50, float %51, float %52) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %98, float %100, float %102) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s16, s[0:3], 0xf ; C208010F s_buffer_load_dword s17, s[0:3], 0x10 ; C2088110 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[8:11], 0 idxen ; E00C2000 80020900 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114 s_buffer_load_dword s6, s[0:3], 0x15 ; C2030115 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s9, s[0:3], 0x2 ; C2048102 s_buffer_load_dword s10, s[0:3], 0x3 ; C2050103 s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104 s_buffer_load_dword s12, s[0:3], 0x18 ; C2060118 s_buffer_load_dword s13, s[0:3], 0x19 ; C2068119 s_buffer_load_dword s14, s[0:3], 0x1c ; C207011C s_buffer_load_dword s15, s[0:3], 0x1d ; C207811D s_buffer_load_dword s18, s[0:3], 0x5 ; C2090105 s_buffer_load_dword s19, s[0:3], 0x6 ; C2098106 s_buffer_load_dword s20, s[0:3], 0x7 ; C20A0107 s_buffer_load_dword s21, s[0:3], 0x8 ; C20A8108 s_buffer_load_dword s22, s[0:3], 0x9 ; C20B0109 s_buffer_load_dword s23, s[0:3], 0xa ; C20B810A s_buffer_load_dword s24, s[0:3], 0xb ; C20C010B s_buffer_load_dword s25, s[0:3], 0xc ; C20C810C s_buffer_load_dword s26, s[0:3], 0xd ; C20D010D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s7, v1 ; 10000207 v_mad_f32 v0, s11, v2, v0 ; D2820000 0402040B v_mul_f32_e32 v13, s17, v9 ; 101A1211 v_mad_f32 v13, s5, v10, v13 ; D282000D 04361405 v_mul_f32_e32 v14, s4, v9 ; 101C1204 v_mad_f32 v14, s6, v10, v14 ; D282000E 043A1406 v_mad_f32 v13, s12, v11, v13 ; D282000D 0436160C v_mad_f32 v14, s13, v11, v14 ; D282000E 043A160D v_mad_f32 v13, s14, v12, v13 ; D282000D 0436180E v_mad_f32 v9, s15, v12, v14 ; D2820009 043A180F v_mul_f32_e32 v10, s8, v1 ; 10140208 v_mad_f32 v10, s18, v2, v10 ; D282000A 042A0412 v_mul_f32_e32 v11, s9, v1 ; 10160209 v_mad_f32 v11, s19, v2, v11 ; D282000B 042E0413 v_mul_f32_e32 v12, s10, v1 ; 1018020A v_mad_f32 v12, s20, v2, v12 ; D282000C 04320414 v_mad_f32 v0, s21, v3, v0 ; D2820000 04020615 v_mad_f32 v10, s22, v3, v10 ; D282000A 042A0616 v_mad_f32 v11, s23, v3, v11 ; D282000B 042E0617 v_mad_f32 v12, s24, v3, v12 ; D282000C 04320618 v_mad_f32 v0, s25, v4, v0 ; D2820000 04020819 v_mad_f32 v10, s26, v4, v10 ; D282000A 042A081A v_mad_f32 v11, s0, v4, v11 ; D282000B 042E0800 v_mad_f32 v1, s16, v4, v12 ; D2820001 04320810 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 32, 0, 0, 0, v13, v9, v2, v2 ; F800020F 0202090D exp 15, 33, 0, 0, 0, v5, v6, v7, v8 ; F800021F 08070605 exp 15, 12, 0, 1, 0, v0, v10, v11, v1 ; F80008CF 010B0A00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Code Size: 348 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[2..5] DCL TEMP[0..2], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[1].x, TEMP[0].wwww, CONST[5].xxxx 3: MOV TEMP[1].w, TEMP[1].xxxx 4: MAD TEMP[2].xyz, IN[1].xyzz, CONST[3].xyzz, CONST[2].xyzz 5: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[2].xyzz 6: MOV TEMP[0].xy, IN[0].xyyy 7: TEX TEMP[0].xyz, TEMP[0], SAMP[1], 2D 8: MAD TEMP[1].xyz, TEMP[0].xyzz, CONST[4].xyzz, TEMP[1].xyzz 9: MOV OUT[0], TEMP[1] 10: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %34 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %35 = load <32 x i8>, <32 x i8> addrspace(2)* %34, align 32, !tbaa !0 %36 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 %38 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %39 = bitcast <8 x i32> addrspace(2)* %38 to <32 x i8> addrspace(2)* %40 = load <32 x i8>, <32 x i8> addrspace(2)* %39, align 32, !tbaa !0 %41 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %42 = bitcast <4 x i32> addrspace(2)* %41 to <16 x i8> addrspace(2)* %43 = load <16 x i8>, <16 x i8> addrspace(2)* %42, align 16, !tbaa !0 %44 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %49 = bitcast float %44 to i32 %50 = bitcast float %45 to i32 %51 = insertelement <2 x i32> undef, i32 %49, i32 0 %52 = insertelement <2 x i32> %51, i32 %50, i32 1 %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %35, <16 x i8> %37, i32 2) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = extractelement <4 x float> %53, i32 3 %58 = fmul float %57, %33 %59 = fmul float %46, %27 %60 = fadd float %59, %24 %61 = fmul float %47, %28 %62 = fadd float %61, %25 %63 = fmul float %48, %29 %64 = fadd float %63, %26 %65 = fmul float %54, %60 %66 = fmul float %55, %62 %67 = fmul float %56, %64 %68 = bitcast float %44 to i32 %69 = bitcast float %45 to i32 %70 = insertelement <2 x i32> undef, i32 %68, i32 0 %71 = insertelement <2 x i32> %70, i32 %69, i32 1 %72 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %71, <32 x i8> %40, <16 x i8> %43, i32 2) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = extractelement <4 x float> %72, i32 2 %76 = fmul float %73, %30 %77 = fadd float %76, %65 %78 = fmul float %74, %31 %79 = fadd float %78, %66 %80 = fmul float %75, %32 %81 = fadd float %80, %67 %82 = call i32 @llvm.SI.packf16(float %77, float %79) %83 = bitcast i32 %82 to float %84 = call i32 @llvm.SI.packf16(float %81, float %58) %85 = bitcast i32 %84 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %83, float %85, float %83, float %85) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108 s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109 s_buffer_load_dword s10, s[0:3], 0xa ; C205010A s_buffer_load_dword s11, s[0:3], 0xc ; C205810C s_buffer_load_dword s12, s[0:3], 0xd ; C206010D s_buffer_load_dword s13, s[0:3], 0xe ; C206810E s_buffer_load_dword s14, s[0:3], 0x10 ; C2070110 s_buffer_load_dword s15, s[0:3], 0x11 ; C2078111 s_buffer_load_dword s16, s[0:3], 0x12 ; C2080112 s_buffer_load_dword s0, s[0:3], 0x14 ; C2000114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s8 ; 7E000208 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 v_mov_b32_e32 v1, s9 ; 7E020209 s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504 v_mov_b32_e32 v7, s10 ; 7E0E020A s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 v_mad_f32 v0, s11, v4, v0 ; D2820000 0402080B v_mad_f32 v1, s12, v5, v1 ; D2820001 04060A0C v_mad_f32 v4, s13, v6, v7 ; D2820004 041E0C0D s_load_dwordx8 s[4:11], s[6:7], 0x8 ; C0C20708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[5:8], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[20:23] ; F0800F00 00A70502 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v9, s0, v8 ; 10121000 image_sample v[10:12], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[24:27] ; F0800700 00C10A02 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mul_f32_e32 v1, v1, v6 ; 10020D01 v_mul_f32_e32 v2, v4, v7 ; 10040F04 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v10, s14, v0 ; D2820000 04001D0A v_mad_f32 v1, v11, s15, v1 ; D2820001 04041F0B v_mad_f32 v2, v12, s16, v2 ; D2820002 0408210C v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v9 ; 5E021302 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 240 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..7] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MUL TEMP[1], CONST[4], IN[2].xxxx 2: MAD TEMP[1], CONST[5], IN[2].yyyy, TEMP[1] 3: MAD TEMP[1], CONST[6], IN[2].zzzz, TEMP[1] 4: MAD TEMP[1].xy, CONST[7], IN[2].wwww, TEMP[1] 5: MOV TEMP[0].xy, TEMP[1].xyxx 6: MUL TEMP[1], CONST[0], IN[0].xxxx 7: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[1] 8: MAD TEMP[1], CONST[2], IN[0].zzzz, TEMP[1] 9: MAD TEMP[1], CONST[3], IN[0].wwww, TEMP[1] 10: MOV OUT[2], IN[1] 11: MOV OUT[0], TEMP[1] 12: MOV OUT[1], TEMP[0] 13: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = extractelement <4 x float> %56, i32 3 %61 = fmul float %29, %57 %62 = fmul float %30, %57 %63 = fmul float %31, %58 %64 = fadd float %63, %61 %65 = fmul float %32, %58 %66 = fadd float %65, %62 %67 = fmul float %33, %59 %68 = fadd float %67, %64 %69 = fmul float %34, %59 %70 = fadd float %69, %66 %71 = fmul float %35, %60 %72 = fadd float %71, %68 %73 = fmul float %36, %60 %74 = fadd float %73, %70 %75 = fmul float %13, %41 %76 = fmul float %14, %41 %77 = fmul float %15, %41 %78 = fmul float %16, %41 %79 = fmul float %17, %42 %80 = fadd float %79, %75 %81 = fmul float %18, %42 %82 = fadd float %81, %76 %83 = fmul float %19, %42 %84 = fadd float %83, %77 %85 = fmul float %20, %42 %86 = fadd float %85, %78 %87 = fmul float %21, %43 %88 = fadd float %87, %80 %89 = fmul float %22, %43 %90 = fadd float %89, %82 %91 = fmul float %23, %43 %92 = fadd float %91, %84 %93 = fmul float %24, %43 %94 = fadd float %93, %86 %95 = fmul float %25, %44 %96 = fadd float %95, %88 %97 = fmul float %26, %44 %98 = fadd float %97, %90 %99 = fmul float %27, %44 %100 = fadd float %99, %92 %101 = fmul float %28, %44 %102 = fadd float %101, %94 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %72, float %74, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %49, float %50, float %51, float %52) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %98, float %100, float %102) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s16, s[0:3], 0xf ; C208010F s_buffer_load_dword s17, s[0:3], 0x10 ; C2088110 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[8:11], 0 idxen ; E00C2000 80020900 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114 s_buffer_load_dword s6, s[0:3], 0x15 ; C2030115 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s9, s[0:3], 0x2 ; C2048102 s_buffer_load_dword s10, s[0:3], 0x3 ; C2050103 s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104 s_buffer_load_dword s12, s[0:3], 0x18 ; C2060118 s_buffer_load_dword s13, s[0:3], 0x19 ; C2068119 s_buffer_load_dword s14, s[0:3], 0x1c ; C207011C s_buffer_load_dword s15, s[0:3], 0x1d ; C207811D s_buffer_load_dword s18, s[0:3], 0x5 ; C2090105 s_buffer_load_dword s19, s[0:3], 0x6 ; C2098106 s_buffer_load_dword s20, s[0:3], 0x7 ; C20A0107 s_buffer_load_dword s21, s[0:3], 0x8 ; C20A8108 s_buffer_load_dword s22, s[0:3], 0x9 ; C20B0109 s_buffer_load_dword s23, s[0:3], 0xa ; C20B810A s_buffer_load_dword s24, s[0:3], 0xb ; C20C010B s_buffer_load_dword s25, s[0:3], 0xc ; C20C810C s_buffer_load_dword s26, s[0:3], 0xd ; C20D010D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s7, v1 ; 10000207 v_mad_f32 v0, s11, v2, v0 ; D2820000 0402040B v_mul_f32_e32 v13, s17, v9 ; 101A1211 v_mad_f32 v13, s5, v10, v13 ; D282000D 04361405 v_mul_f32_e32 v14, s4, v9 ; 101C1204 v_mad_f32 v14, s6, v10, v14 ; D282000E 043A1406 v_mad_f32 v13, s12, v11, v13 ; D282000D 0436160C v_mad_f32 v14, s13, v11, v14 ; D282000E 043A160D v_mad_f32 v13, s14, v12, v13 ; D282000D 0436180E v_mad_f32 v9, s15, v12, v14 ; D2820009 043A180F v_mul_f32_e32 v10, s8, v1 ; 10140208 v_mad_f32 v10, s18, v2, v10 ; D282000A 042A0412 v_mul_f32_e32 v11, s9, v1 ; 10160209 v_mad_f32 v11, s19, v2, v11 ; D282000B 042E0413 v_mul_f32_e32 v12, s10, v1 ; 1018020A v_mad_f32 v12, s20, v2, v12 ; D282000C 04320414 v_mad_f32 v0, s21, v3, v0 ; D2820000 04020615 v_mad_f32 v10, s22, v3, v10 ; D282000A 042A0616 v_mad_f32 v11, s23, v3, v11 ; D282000B 042E0617 v_mad_f32 v12, s24, v3, v12 ; D282000C 04320618 v_mad_f32 v0, s25, v4, v0 ; D2820000 04020819 v_mad_f32 v10, s26, v4, v10 ; D282000A 042A081A v_mad_f32 v11, s0, v4, v11 ; D282000B 042E0800 v_mad_f32 v1, s16, v4, v12 ; D2820001 04320810 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 32, 0, 0, 0, v13, v9, v2, v2 ; F800020F 0202090D exp 15, 33, 0, 0, 0, v5, v6, v7, v8 ; F800021F 08070605 exp 15, 12, 0, 1, 0, v0, v10, v11, v1 ; F80008CF 010B0A00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Code Size: 348 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[1..3] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.5000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: FSLT TEMP[1].x, TEMP[0].wwww, IMM[0].xxxx 3: AND TEMP[2].x, TEMP[1].xxxx, IMM[0].yyyy 4: KILL_IF -TEMP[2].xxxx 5: MUL TEMP[2].x, TEMP[0].wwww, CONST[3].xxxx 6: MOV TEMP[1].w, TEMP[2].xxxx 7: MAD TEMP[2].xyz, IN[1].xyzz, CONST[2].xyzz, CONST[1].xyzz 8: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[2].xyzz 9: MOV OUT[0], TEMP[1] 10: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0 %33 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %40 = bitcast float %35 to i32 %41 = bitcast float %36 to i32 %42 = insertelement <2 x i32> undef, i32 %40, i32 0 %43 = insertelement <2 x i32> %42, i32 %41, i32 1 %44 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %43, <32 x i8> %32, <16 x i8> %34, i32 2) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = extractelement <4 x float> %44, i32 3 %49 = fcmp olt float %48, 5.000000e-01 %50 = select i1 %49, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %50) %51 = fmul float %48, %30 %52 = fmul float %37, %27 %53 = fadd float %52, %24 %54 = fmul float %38, %28 %55 = fadd float %54, %25 %56 = fmul float %39, %29 %57 = fadd float %56, %26 %58 = fmul float %45, %53 %59 = fmul float %46, %55 %60 = fmul float %47, %57 %61 = call i32 @llvm.SI.packf16(float %58, float %59) %62 = bitcast i32 %61 to float %63 = call i32 @llvm.SI.packf16(float %60, float %51) %64 = bitcast i32 %63 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %62, float %64, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s12, s[0:3], 0x6 ; C2060106 s_buffer_load_dword s13, s[0:3], 0x8 ; C2068108 s_buffer_load_dword s14, s[0:3], 0x9 ; C2070109 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_buffer_load_dword s6, s[0:3], 0xc ; C203010C s_buffer_load_dword s0, s[0:3], 0xa ; C200010A v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800F00 00440002 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_gt_f32_e32 vcc, 0.5, v3 ; 7C0806F0 v_cndmask_b32_e64 v7, 0, -1.0, vcc ; D2000007 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v7 ; 7C260E80 v_mul_f32_e32 v7, s6, v3 ; 100E0606 v_mov_b32_e32 v8, s4 ; 7E100204 v_mad_f32 v4, s13, v4, v8 ; D2820004 0422080D v_mov_b32_e32 v8, s5 ; 7E100205 v_mad_f32 v5, s14, v5, v8 ; D2820005 04220A0E v_mov_b32_e32 v8, s12 ; 7E10020C v_mad_f32 v6, s0, v6, v8 ; D2820006 04220C00 v_mul_f32_e32 v4, v4, v0 ; 10080104 v_mul_f32_e32 v5, v5, v1 ; 100A0305 v_mul_f32_e32 v0, v6, v2 ; 10000506 v_cvt_pkrtz_f16_f32_e32 v1, v4, v5 ; 5E020B04 v_cvt_pkrtz_f16_f32_e32 v0, v0, v7 ; 5E000F00 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 12 Code Size: 196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..7] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MUL TEMP[1], CONST[4], IN[1].xxxx 2: MAD TEMP[1], CONST[5], IN[1].yyyy, TEMP[1] 3: MAD TEMP[1], CONST[6], IN[1].zzzz, TEMP[1] 4: MAD TEMP[1].xy, CONST[7], IN[1].wwww, TEMP[1] 5: MOV TEMP[0].xy, TEMP[1].xyxx 6: MUL TEMP[1], CONST[0], IN[0].xxxx 7: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[1] 8: MAD TEMP[1], CONST[2], IN[0].zzzz, TEMP[1] 9: MAD TEMP[1], CONST[3], IN[0].wwww, TEMP[1] 10: MOV OUT[0], TEMP[1] 11: MOV OUT[1], TEMP[0] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %29, %49 %54 = fmul float %30, %49 %55 = fmul float %31, %50 %56 = fadd float %55, %53 %57 = fmul float %32, %50 %58 = fadd float %57, %54 %59 = fmul float %33, %51 %60 = fadd float %59, %56 %61 = fmul float %34, %51 %62 = fadd float %61, %58 %63 = fmul float %35, %52 %64 = fadd float %63, %60 %65 = fmul float %36, %52 %66 = fadd float %65, %62 %67 = fmul float %13, %41 %68 = fmul float %14, %41 %69 = fmul float %15, %41 %70 = fmul float %16, %41 %71 = fmul float %17, %42 %72 = fadd float %71, %67 %73 = fmul float %18, %42 %74 = fadd float %73, %68 %75 = fmul float %19, %42 %76 = fadd float %75, %69 %77 = fmul float %20, %42 %78 = fadd float %77, %70 %79 = fmul float %21, %43 %80 = fadd float %79, %72 %81 = fmul float %22, %43 %82 = fadd float %81, %74 %83 = fmul float %23, %43 %84 = fadd float %83, %76 %85 = fmul float %24, %43 %86 = fadd float %85, %78 %87 = fmul float %25, %44 %88 = fadd float %87, %80 %89 = fmul float %26, %44 %90 = fadd float %89, %82 %91 = fmul float %27, %44 %92 = fadd float %91, %84 %93 = fmul float %28, %44 %94 = fadd float %93, %86 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %64, float %66, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %88, float %90, float %92, float %94) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0xf ; C206010F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_buffer_load_dword s6, s[0:3], 0x14 ; C2030114 s_buffer_load_dword s7, s[0:3], 0x15 ; C2038115 s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x3 ; C2058103 s_buffer_load_dword s13, s[0:3], 0x4 ; C2068104 s_buffer_load_dword s14, s[0:3], 0x5 ; C2070105 s_buffer_load_dword s15, s[0:3], 0x6 ; C2078106 s_buffer_load_dword s16, s[0:3], 0x7 ; C2080107 s_buffer_load_dword s17, s[0:3], 0x8 ; C2088108 s_buffer_load_dword s18, s[0:3], 0x9 ; C2090109 s_buffer_load_dword s19, s[0:3], 0x18 ; C2098118 s_buffer_load_dword s20, s[0:3], 0x19 ; C20A0119 s_buffer_load_dword s21, s[0:3], 0x1c ; C20A811C s_buffer_load_dword s22, s[0:3], 0x1d ; C20B011D s_buffer_load_dword s23, s[0:3], 0xa ; C20B810A s_buffer_load_dword s24, s[0:3], 0xb ; C20C010B s_buffer_load_dword s25, s[0:3], 0xc ; C20C810C s_buffer_load_dword s26, s[0:3], 0xd ; C20D010D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s8, v1 ; 10000208 v_mul_f32_e32 v9, s4, v5 ; 10120A04 v_mad_f32 v9, s6, v6, v9 ; D2820009 04260C06 v_mul_f32_e32 v10, s5, v5 ; 10140A05 v_mad_f32 v10, s7, v6, v10 ; D282000A 042A0C07 v_mad_f32 v9, s19, v7, v9 ; D2820009 04260E13 v_mad_f32 v10, s20, v7, v10 ; D282000A 042A0E14 v_mad_f32 v9, s21, v8, v9 ; D2820009 04261015 v_mad_f32 v5, s22, v8, v10 ; D2820005 042A1016 v_mad_f32 v0, s13, v2, v0 ; D2820000 0402040D v_mul_f32_e32 v6, s9, v1 ; 100C0209 v_mad_f32 v6, s14, v2, v6 ; D2820006 041A040E v_mul_f32_e32 v7, s10, v1 ; 100E020A v_mad_f32 v7, s15, v2, v7 ; D2820007 041E040F v_mul_f32_e32 v8, s11, v1 ; 1010020B v_mad_f32 v8, s16, v2, v8 ; D2820008 04220410 v_mad_f32 v0, s17, v3, v0 ; D2820000 04020611 v_mad_f32 v6, s18, v3, v6 ; D2820006 041A0612 v_mad_f32 v7, s23, v3, v7 ; D2820007 041E0617 v_mad_f32 v8, s24, v3, v8 ; D2820008 04220618 v_mad_f32 v0, s25, v4, v0 ; D2820000 04020819 v_mad_f32 v6, s26, v4, v6 ; D2820006 041A081A v_mad_f32 v7, s0, v4, v7 ; D2820007 041E0800 v_mad_f32 v1, s12, v4, v8 ; D2820001 0422080C v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 32, 0, 0, 0, v9, v5, v2, v2 ; F800020F 02020509 exp 15, 12, 0, 1, 0, v0, v6, v7, v1 ; F80008CF 01070600 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 12 Code Size: 328 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL CONST[4..8] DCL TEMP[0..3], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[1].x, TEMP[0].wwww, CONST[8].xxxx 3: MOV TEMP[1].w, TEMP[1].xxxx 4: MOV TEMP[2].xy, IN[0].xyyy 5: TEX TEMP[2].xyz, TEMP[2], SAMP[3], 2D 6: MOV TEMP[3].xy, IN[0].xyyy 7: TEX TEMP[3].xyz, TEMP[3], SAMP[2], 2D 8: MUL TEMP[3].xyz, TEMP[3].xyzz, CONST[4].xyzz 9: MAD TEMP[2].xyz, TEMP[2].xyzz, CONST[5].xyzz, TEMP[3].xyzz 10: ADD TEMP[1].xyz, TEMP[0].xyzz, TEMP[2].xyzz 11: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[6].xyzz 12: MOV TEMP[0].xy, IN[0].xyyy 13: TEX TEMP[0].xyz, TEMP[0], SAMP[1], 2D 14: MAD TEMP[1].xyz, TEMP[0].xyzz, CONST[7].xyzz, TEMP[1].xyzz 15: MOV OUT[0], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %37 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %38 = load <32 x i8>, <32 x i8> addrspace(2)* %37, align 32, !tbaa !0 %39 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %42 = bitcast <8 x i32> addrspace(2)* %41 to <32 x i8> addrspace(2)* %43 = load <32 x i8>, <32 x i8> addrspace(2)* %42, align 32, !tbaa !0 %44 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %45 = bitcast <4 x i32> addrspace(2)* %44 to <16 x i8> addrspace(2)* %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %48 = bitcast <8 x i32> addrspace(2)* %47 to <32 x i8> addrspace(2)* %49 = load <32 x i8>, <32 x i8> addrspace(2)* %48, align 32, !tbaa !0 %50 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %51 = bitcast <4 x i32> addrspace(2)* %50 to <16 x i8> addrspace(2)* %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %54 = bitcast <8 x i32> addrspace(2)* %53 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %57 = bitcast <4 x i32> addrspace(2)* %56 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %60 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %61 = bitcast float %59 to i32 %62 = bitcast float %60 to i32 %63 = insertelement <2 x i32> undef, i32 %61, i32 0 %64 = insertelement <2 x i32> %63, i32 %62, i32 1 %65 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %64, <32 x i8> %38, <16 x i8> %40, i32 2) %66 = extractelement <4 x float> %65, i32 0 %67 = extractelement <4 x float> %65, i32 1 %68 = extractelement <4 x float> %65, i32 2 %69 = extractelement <4 x float> %65, i32 3 %70 = fmul float %69, %36 %71 = bitcast float %59 to i32 %72 = bitcast float %60 to i32 %73 = insertelement <2 x i32> undef, i32 %71, i32 0 %74 = insertelement <2 x i32> %73, i32 %72, i32 1 %75 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %74, <32 x i8> %55, <16 x i8> %58, i32 2) %76 = extractelement <4 x float> %75, i32 0 %77 = extractelement <4 x float> %75, i32 1 %78 = extractelement <4 x float> %75, i32 2 %79 = bitcast float %59 to i32 %80 = bitcast float %60 to i32 %81 = insertelement <2 x i32> undef, i32 %79, i32 0 %82 = insertelement <2 x i32> %81, i32 %80, i32 1 %83 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %82, <32 x i8> %49, <16 x i8> %52, i32 2) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = fmul float %84, %24 %88 = fmul float %85, %25 %89 = fmul float %86, %26 %90 = fmul float %76, %27 %91 = fadd float %90, %87 %92 = fmul float %77, %28 %93 = fadd float %92, %88 %94 = fmul float %78, %29 %95 = fadd float %94, %89 %96 = fadd float %66, %91 %97 = fadd float %67, %93 %98 = fadd float %68, %95 %99 = fmul float %96, %30 %100 = fmul float %97, %31 %101 = fmul float %98, %32 %102 = bitcast float %59 to i32 %103 = bitcast float %60 to i32 %104 = insertelement <2 x i32> undef, i32 %102, i32 0 %105 = insertelement <2 x i32> %104, i32 %103, i32 1 %106 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %105, <32 x i8> %43, <16 x i8> %46, i32 2) %107 = extractelement <4 x float> %106, i32 0 %108 = extractelement <4 x float> %106, i32 1 %109 = extractelement <4 x float> %106, i32 2 %110 = fmul float %107, %33 %111 = fadd float %110, %99 %112 = fmul float %108, %34 %113 = fadd float %112, %100 %114 = fmul float %109, %35 %115 = fadd float %114, %101 %116 = call i32 @llvm.SI.packf16(float %111, float %113) %117 = bitcast i32 %116 to float %118 = call i32 @llvm.SI.packf16(float %115, float %70) %119 = bitcast i32 %118 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %117, float %119, float %117, float %119) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx4 s[24:27], s[4:5], 0x8 ; C08C0508 s_load_dwordx4 s[28:31], s[4:5], 0xc ; C08E050C s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_load_dwordx8 s[12:19], s[6:7], 0x8 ; C0C60708 s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710 s_load_dwordx8 s[48:55], s[6:7], 0x18 ; C0D80718 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_buffer_load_dword s6, s[0:3], 0x12 ; C2030112 s_buffer_load_dword s7, s[0:3], 0x14 ; C2038114 s_buffer_load_dword s56, s[0:3], 0x15 ; C21C0115 image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[20:23] ; F0800F00 00A80402 s_buffer_load_dword s20, s[0:3], 0x20 ; C20A0120 image_sample v[8:10], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[48:55], s[28:31] ; F0800700 00EC0802 image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[40:47], s[24:27] ; F0800700 00CA0B02 image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00430002 s_buffer_load_dword s8, s[0:3], 0x1d ; C204011D s_buffer_load_dword s9, s[0:3], 0x1e ; C204811E s_buffer_load_dword s10, s[0:3], 0x16 ; C2050116 s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s12, s[0:3], 0x19 ; C2060119 s_buffer_load_dword s13, s[0:3], 0x1a ; C206811A s_buffer_load_dword s0, s[0:3], 0x1c ; C200011C s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mad_f32 v3, v11, s4, v4 ; D2820003 0410090B v_mad_f32 v14, v12, s5, v5 ; D282000E 04140B0C v_mad_f32 v11, v13, s6, v6 ; D282000B 04180D0D v_mul_f32_e32 v4, s20, v7 ; 10080E14 v_mad_f32 v3, v8, s7, v3 ; D2820003 040C0F08 v_mad_f32 v5, v9, s56, v14 ; D2820005 04387109 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, v10, s10, v11 ; D2820006 042C150A v_mul_f32_e32 v3, s11, v3 ; 1006060B v_mul_f32_e32 v5, s12, v5 ; 100A0A0C v_mul_f32_e32 v6, s13, v6 ; 100C0C0D v_mad_f32 v3, v0, s0, v3 ; D2820003 040C0100 v_mad_f32 v5, v1, s8, v5 ; D2820005 04141101 v_mad_f32 v0, v2, s9, v6 ; D2820000 04181302 v_cvt_pkrtz_f16_f32_e32 v1, v3, v5 ; 5E020B03 v_cvt_pkrtz_f16_f32_e32 v0, v0, v4 ; 5E000900 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 16 Code Size: 264 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL OUT[6], GENERIC[14] DCL CONST[0..9] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MUL TEMP[1], CONST[6], IN[1].xxxx 2: MAD TEMP[1], CONST[7], IN[1].yyyy, TEMP[1] 3: MAD TEMP[1], CONST[8], IN[1].zzzz, TEMP[1] 4: MAD TEMP[1].xy, CONST[9], IN[1].wwww, TEMP[1] 5: MOV TEMP[0].xy, TEMP[1].xyxx 6: DP3 TEMP[1].x, CONST[5].xyzz, IN[2].xyzz 7: DP3 TEMP[2].x, CONST[5].xyzz, IN[3].xyzz 8: MOV TEMP[1].y, TEMP[2].xxxx 9: DP3 TEMP[2].x, CONST[5].xyzz, IN[4].xyzz 10: MOV TEMP[1].z, TEMP[2].xxxx 11: ADD TEMP[2].xyz, CONST[4].xyzz, -IN[0].xyzz 12: DP3 TEMP[3].x, TEMP[2].xyzz, IN[2].xyzz 13: DP3 TEMP[4].x, TEMP[2].xyzz, IN[3].xyzz 14: MOV TEMP[3].y, TEMP[4].xxxx 15: DP3 TEMP[2].x, TEMP[2].xyzz, IN[4].xyzz 16: MOV TEMP[3].z, TEMP[2].xxxx 17: MOV TEMP[3].w, IMM[0].xxxx 18: MOV TEMP[2].w, IMM[0].xxxx 19: MOV TEMP[2].xyz, IN[2].xyzx 20: MOV TEMP[4].w, IMM[0].xxxx 21: MOV TEMP[4].xyz, IN[3].xyzx 22: MOV TEMP[5].w, IMM[0].xxxx 23: MOV TEMP[5].xyz, IN[4].xyzx 24: MUL TEMP[6], CONST[0], IN[0].xxxx 25: MAD TEMP[6], CONST[1], IN[0].yyyy, TEMP[6] 26: MAD TEMP[6], CONST[2], IN[0].zzzz, TEMP[6] 27: MAD TEMP[6], CONST[3], IN[0].wwww, TEMP[6] 28: MOV TEMP[1].xyz, TEMP[1].xyzx 29: MOV OUT[6], TEMP[1] 30: MOV OUT[3], TEMP[5] 31: MOV OUT[4], TEMP[2] 32: MOV OUT[0], TEMP[6] 33: MOV OUT[5], TEMP[4] 34: MOV OUT[1], TEMP[3] 35: MOV OUT[2], TEMP[0] 36: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = extractelement <4 x float> %57, i32 3 %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 %64 = add i32 %5, %7 %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %64) %66 = extractelement <4 x float> %65, i32 0 %67 = extractelement <4 x float> %65, i32 1 %68 = extractelement <4 x float> %65, i32 2 %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = add i32 %5, %7 %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %71) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = extractelement <4 x float> %72, i32 2 %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !tbaa !0 %78 = add i32 %5, %7 %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %77, i32 0, i32 %78) %80 = extractelement <4 x float> %79, i32 0 %81 = extractelement <4 x float> %79, i32 1 %82 = extractelement <4 x float> %79, i32 2 %83 = fmul float %35, %58 %84 = fmul float %36, %58 %85 = fmul float %37, %58 %86 = fmul float %38, %59 %87 = fadd float %86, %83 %88 = fmul float %39, %59 %89 = fadd float %88, %84 %90 = fmul float %40, %59 %91 = fadd float %90, %85 %92 = fmul float %41, %60 %93 = fadd float %92, %87 %94 = fmul float %42, %60 %95 = fadd float %94, %89 %96 = fmul float %43, %60 %97 = fadd float %96, %91 %98 = fmul float %44, %61 %99 = fadd float %98, %93 %100 = fmul float %45, %61 %101 = fadd float %100, %95 %102 = fmul float %32, %66 %103 = fmul float %33, %67 %104 = fadd float %103, %102 %105 = fmul float %34, %68 %106 = fadd float %104, %105 %107 = fmul float %32, %73 %108 = fmul float %33, %74 %109 = fadd float %108, %107 %110 = fmul float %34, %75 %111 = fadd float %109, %110 %112 = fmul float %32, %80 %113 = fmul float %33, %81 %114 = fadd float %113, %112 %115 = fmul float %34, %82 %116 = fadd float %114, %115 %117 = fsub float %29, %50 %118 = fsub float %30, %51 %119 = fsub float %31, %52 %120 = fmul float %117, %66 %121 = fmul float %118, %67 %122 = fadd float %121, %120 %123 = fmul float %119, %68 %124 = fadd float %122, %123 %125 = fmul float %117, %73 %126 = fmul float %118, %74 %127 = fadd float %126, %125 %128 = fmul float %119, %75 %129 = fadd float %127, %128 %130 = fmul float %117, %80 %131 = fmul float %118, %81 %132 = fadd float %131, %130 %133 = fmul float %119, %82 %134 = fadd float %132, %133 %135 = fmul float %13, %50 %136 = fmul float %14, %50 %137 = fmul float %15, %50 %138 = fmul float %16, %50 %139 = fmul float %17, %51 %140 = fadd float %139, %135 %141 = fmul float %18, %51 %142 = fadd float %141, %136 %143 = fmul float %19, %51 %144 = fadd float %143, %137 %145 = fmul float %20, %51 %146 = fadd float %145, %138 %147 = fmul float %21, %52 %148 = fadd float %147, %140 %149 = fmul float %22, %52 %150 = fadd float %149, %142 %151 = fmul float %23, %52 %152 = fadd float %151, %144 %153 = fmul float %24, %52 %154 = fadd float %153, %146 %155 = fmul float %25, %53 %156 = fadd float %155, %148 %157 = fmul float %26, %53 %158 = fadd float %157, %150 %159 = fmul float %27, %53 %160 = fadd float %159, %152 %161 = fmul float %28, %53 %162 = fadd float %161, %154 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %124, float %129, float %134, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %99, float %101, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %80, float %81, float %82, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %66, float %67, float %68, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %73, float %74, float %75, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %106, float %111, float %116, float %97) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %156, float %158, float %160, float %162) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s24, s[0:3], 0xf ; C20C010F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 buffer_load_format_xyzw v[14:17], v0, s[20:23], 0 idxen ; E00C2000 80050E00 buffer_load_format_xyzw v[18:21], v0, s[8:11], 0 idxen ; E00C2000 80021200 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_buffer_load_dword s6, s[0:3], 0x12 ; C2030112 s_buffer_load_dword s7, s[0:3], 0x14 ; C2038114 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x16 ; C2048116 s_buffer_load_dword s10, s[0:3], 0x18 ; C2050118 s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119 s_buffer_load_dword s12, s[0:3], 0x1b ; C206011B s_buffer_load_dword s13, s[0:3], 0x1c ; C206811C s_buffer_load_dword s14, s[0:3], 0x1d ; C207011D s_buffer_load_dword s15, s[0:3], 0x1f ; C207811F s_buffer_load_dword s16, s[0:3], 0x20 ; C2080120 s_buffer_load_dword s17, s[0:3], 0x21 ; C2088121 s_buffer_load_dword s18, s[0:3], 0x23 ; C2090123 s_buffer_load_dword s19, s[0:3], 0x24 ; C2098124 s_buffer_load_dword s20, s[0:3], 0x25 ; C20A0125 s_buffer_load_dword s21, s[0:3], 0x0 ; C20A8100 s_buffer_load_dword s22, s[0:3], 0x1 ; C20B0101 s_buffer_load_dword s23, s[0:3], 0x2 ; C20B8102 s_buffer_load_dword s25, s[0:3], 0x3 ; C20C8103 s_buffer_load_dword s26, s[0:3], 0x4 ; C20D0104 s_buffer_load_dword s27, s[0:3], 0x5 ; C20D8105 s_buffer_load_dword s28, s[0:3], 0x6 ; C20E0106 s_buffer_load_dword s29, s[0:3], 0x7 ; C20E8107 s_buffer_load_dword s30, s[0:3], 0x8 ; C20F0108 s_buffer_load_dword s31, s[0:3], 0x9 ; C20F8109 s_buffer_load_dword s32, s[0:3], 0xa ; C210010A s_buffer_load_dword s33, s[0:3], 0xb ; C210810B s_buffer_load_dword s34, s[0:3], 0xc ; C211010C s_buffer_load_dword s35, s[0:3], 0xd ; C211810D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s21, v2 ; 10000415 v_mul_f32_e32 v22, s10, v6 ; 102C0C0A v_mad_f32 v22, s13, v7, v22 ; D2820016 045A0E0D v_mul_f32_e32 v23, s11, v6 ; 102E0C0B v_mad_f32 v23, s14, v7, v23 ; D2820017 045E0E0E v_mul_f32_e32 v24, s12, v6 ; 10300C0C v_mad_f32 v24, s15, v7, v24 ; D2820018 04620E0F v_mad_f32 v22, s16, v8, v22 ; D2820016 045A1010 v_mad_f32 v23, s17, v8, v23 ; D2820017 045E1011 v_mad_f32 v24, s18, v8, v24 ; D2820018 04621012 v_mad_f32 v22, s19, v9, v22 ; D2820016 045A1213 v_mad_f32 v6, s20, v9, v23 ; D2820006 045E1214 v_mad_f32 v0, s26, v3, v0 ; D2820000 0402061A v_mul_f32_e32 v7, s22, v2 ; 100E0416 v_mad_f32 v7, s27, v3, v7 ; D2820007 041E061B v_mul_f32_e32 v8, s23, v2 ; 10100417 v_mad_f32 v8, s28, v3, v8 ; D2820008 0422061C v_mul_f32_e32 v9, s25, v2 ; 10120419 v_mad_f32 v9, s29, v3, v9 ; D2820009 0426061D v_mad_f32 v0, s30, v4, v0 ; D2820000 0402081E v_mad_f32 v7, s31, v4, v7 ; D2820007 041E081F v_mad_f32 v8, s32, v4, v8 ; D2820008 04220820 v_mad_f32 v9, s33, v4, v9 ; D2820009 04260821 v_mad_f32 v0, s34, v5, v0 ; D2820000 04020A22 v_mad_f32 v7, s35, v5, v7 ; D2820007 041E0A23 v_mad_f32 v8, s0, v5, v8 ; D2820008 04220A00 v_mad_f32 v9, s24, v5, v9 ; D2820009 04260A18 v_sub_f32_e32 v23, s4, v2 ; 082E0404 v_sub_f32_e32 v25, s5, v3 ; 08320605 v_sub_f32_e32 v2, s6, v4 ; 08040806 v_mul_f32_e32 v3, v10, v23 ; 10062F0A v_mul_f32_e32 v4, v14, v23 ; 10082F0E v_mul_f32_e32 v5, v18, v23 ; 100A2F12 v_mad_f32 v3, v25, v11, v3 ; D2820003 040E1719 v_mad_f32 v4, v25, v15, v4 ; D2820004 04121F19 v_mad_f32 v5, v25, v19, v5 ; D2820005 04162719 v_mad_f32 v3, v2, v12, v3 ; D2820003 040E1902 v_mad_f32 v4, v2, v16, v4 ; D2820004 04122102 v_mad_f32 v2, v2, v20, v5 ; D2820002 04162902 exp 15, 32, 0, 0, 0, v3, v4, v2, v1 ; F800020F 01020403 exp 15, 33, 0, 0, 0, v22, v6, v1, v1 ; F800021F 01010616 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, s7, v18 ; 10042407 v_mad_f32 v2, s8, v19, v2 ; D2820002 040A2608 v_mad_f32 v2, s9, v20, v2 ; D2820002 040A2809 exp 15, 34, 0, 0, 0, v18, v19, v20, v1 ; F800022F 01141312 v_mul_f32_e32 v3, s7, v10 ; 10061407 v_mul_f32_e32 v4, s7, v14 ; 10081C07 v_mad_f32 v3, s8, v11, v3 ; D2820003 040E1608 v_mad_f32 v4, s8, v15, v4 ; D2820004 04121E08 v_mad_f32 v3, s9, v12, v3 ; D2820003 040E1809 v_mad_f32 v4, s9, v16, v4 ; D2820004 04122009 exp 15, 35, 0, 0, 0, v10, v11, v12, v1 ; F800023F 010C0B0A exp 15, 36, 0, 0, 0, v14, v15, v16, v1 ; F800024F 01100F0E exp 15, 37, 0, 0, 0, v3, v4, v2, v24 ; F800025F 18020403 exp 15, 12, 0, 1, 0, v0, v7, v8, v9 ; F80008CF 09080700 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 28 Code Size: 596 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL IN[5], GENERIC[14], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL CONST[5..14] DCL CONST[17] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { -0.5000, 2.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D 2: MUL TEMP[1].x, TEMP[0].wwww, CONST[10].xxxx 3: MOV TEMP[1].w, TEMP[1].xxxx 4: MOV TEMP[2].xy, IN[1].xyyy 5: TEX TEMP[2].xyz, TEMP[2], SAMP[4], 2D 6: MOV TEMP[3].xy, IN[1].xyyy 7: TEX TEMP[3].xyz, TEMP[3], SAMP[3], 2D 8: MUL TEMP[3].xyz, TEMP[3].xyzz, CONST[5].xyzz 9: MAD TEMP[2].xyz, TEMP[2].xyzz, CONST[6].xyzz, TEMP[3].xyzz 10: ADD TEMP[1].xyz, TEMP[0].xyzz, TEMP[2].xyzz 11: MOV TEMP[0].xy, IN[1].xyyy 12: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D 13: ADD TEMP[0].xyz, TEMP[0].xyzz, IMM[0].xxxx 14: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 15: RSQ TEMP[2].x, TEMP[2].xxxx 16: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx 17: MOV TEMP[2].xyz, -IN[0].xyzx 18: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[2].xyzz 19: MUL TEMP[3].xyz, TEMP[3].xxxx, TEMP[0].xyzz 20: MUL TEMP[3].xyz, IMM[0].yyyy, TEMP[3].xyzz 21: ADD TEMP[2].xyz, TEMP[2].xyzz, -TEMP[3].xyzz 22: MUL TEMP[3].xyz, TEMP[2].xxxx, IN[3].xyzz 23: MAD TEMP[3].xyz, TEMP[2].yyyy, IN[4].xyzz, TEMP[3].xyzz 24: MAD TEMP[2].xyz, TEMP[2].zzzz, IN[2].xyzz, TEMP[3].xyzz 25: MOV TEMP[3].xy, IN[1].xyyy 26: TEX TEMP[3].xyz, TEMP[3], SAMP[5], 2D 27: MUL TEMP[4], CONST[11], TEMP[2].xxxx 28: MAD TEMP[4], CONST[12], TEMP[2].yyyy, TEMP[4] 29: MAD TEMP[2].xyz, CONST[13], TEMP[2].zzzz, TEMP[4] 30: MOV TEMP[2].xyz, TEMP[2].xyzz 31: TEX TEMP[2].xyz, TEMP[2], SAMP[6], CUBE 32: MAD TEMP[2].xyz, TEMP[3].xyzz, TEMP[2].xyzz, TEMP[1].xyzz 33: DP3 TEMP[3].x, IN[5].xyzz, IN[5].xyzz 34: RSQ TEMP[3].x, TEMP[3].xxxx 35: MUL TEMP[3].xyz, IN[5].xyzz, TEMP[3].xxxx 36: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[3].xyzz 37: MAX TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz 38: MUL TEMP[0].xyz, CONST[8].xyzz, TEMP[0].xxxx 39: MAD TEMP[0].xyz, TEMP[0].xyzz, CONST[17].xyzz, CONST[7].xyzz 40: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[0].xyzz 41: MOV TEMP[0].xy, IN[1].xyyy 42: TEX TEMP[0].xyz, TEMP[0], SAMP[2], 2D 43: MAD TEMP[1].xyz, TEMP[0].xyzz, CONST[9].xyzz, TEMP[1].xyzz 44: MOV OUT[0], TEMP[1] 45: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %52 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %53 = load <32 x i8>, <32 x i8> addrspace(2)* %52, align 32, !tbaa !0 %54 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %57 = bitcast <8 x i32> addrspace(2)* %56 to <32 x i8> addrspace(2)* %58 = load <32 x i8>, <32 x i8> addrspace(2)* %57, align 32, !tbaa !0 %59 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %60 = bitcast <4 x i32> addrspace(2)* %59 to <16 x i8> addrspace(2)* %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 %62 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %63 = bitcast <8 x i32> addrspace(2)* %62 to <32 x i8> addrspace(2)* %64 = load <32 x i8>, <32 x i8> addrspace(2)* %63, align 32, !tbaa !0 %65 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %66 = bitcast <4 x i32> addrspace(2)* %65 to <16 x i8> addrspace(2)* %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %69 = bitcast <8 x i32> addrspace(2)* %68 to <32 x i8> addrspace(2)* %70 = load <32 x i8>, <32 x i8> addrspace(2)* %69, align 32, !tbaa !0 %71 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %72 = bitcast <4 x i32> addrspace(2)* %71 to <16 x i8> addrspace(2)* %73 = load <16 x i8>, <16 x i8> addrspace(2)* %72, align 16, !tbaa !0 %74 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %75 = bitcast <8 x i32> addrspace(2)* %74 to <32 x i8> addrspace(2)* %76 = load <32 x i8>, <32 x i8> addrspace(2)* %75, align 32, !tbaa !0 %77 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %78 = bitcast <4 x i32> addrspace(2)* %77 to <16 x i8> addrspace(2)* %79 = load <16 x i8>, <16 x i8> addrspace(2)* %78, align 16, !tbaa !0 %80 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %81 = bitcast <8 x i32> addrspace(2)* %80 to <32 x i8> addrspace(2)* %82 = load <32 x i8>, <32 x i8> addrspace(2)* %81, align 32, !tbaa !0 %83 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %84 = bitcast <4 x i32> addrspace(2)* %83 to <16 x i8> addrspace(2)* %85 = load <16 x i8>, <16 x i8> addrspace(2)* %84, align 16, !tbaa !0 %86 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %87 = bitcast <8 x i32> addrspace(2)* %86 to <32 x i8> addrspace(2)* %88 = load <32 x i8>, <32 x i8> addrspace(2)* %87, align 32, !tbaa !0 %89 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %90 = bitcast <4 x i32> addrspace(2)* %89 to <16 x i8> addrspace(2)* %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 %92 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %109 = bitcast float %95 to i32 %110 = bitcast float %96 to i32 %111 = insertelement <2 x i32> undef, i32 %109, i32 0 %112 = insertelement <2 x i32> %111, i32 %110, i32 1 %113 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %112, <32 x i8> %58, <16 x i8> %61, i32 2) %114 = extractelement <4 x float> %113, i32 0 %115 = extractelement <4 x float> %113, i32 1 %116 = extractelement <4 x float> %113, i32 2 %117 = extractelement <4 x float> %113, i32 3 %118 = fmul float %117, %39 %119 = bitcast float %95 to i32 %120 = bitcast float %96 to i32 %121 = insertelement <2 x i32> undef, i32 %119, i32 0 %122 = insertelement <2 x i32> %121, i32 %120, i32 1 %123 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %122, <32 x i8> %76, <16 x i8> %79, i32 2) %124 = extractelement <4 x float> %123, i32 0 %125 = extractelement <4 x float> %123, i32 1 %126 = extractelement <4 x float> %123, i32 2 %127 = bitcast float %95 to i32 %128 = bitcast float %96 to i32 %129 = insertelement <2 x i32> undef, i32 %127, i32 0 %130 = insertelement <2 x i32> %129, i32 %128, i32 1 %131 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %130, <32 x i8> %70, <16 x i8> %73, i32 2) %132 = extractelement <4 x float> %131, i32 0 %133 = extractelement <4 x float> %131, i32 1 %134 = extractelement <4 x float> %131, i32 2 %135 = fmul float %132, %24 %136 = fmul float %133, %25 %137 = fmul float %134, %26 %138 = fmul float %124, %27 %139 = fadd float %138, %135 %140 = fmul float %125, %28 %141 = fadd float %140, %136 %142 = fmul float %126, %29 %143 = fadd float %142, %137 %144 = fadd float %114, %139 %145 = fadd float %115, %141 %146 = fadd float %116, %143 %147 = bitcast float %95 to i32 %148 = bitcast float %96 to i32 %149 = insertelement <2 x i32> undef, i32 %147, i32 0 %150 = insertelement <2 x i32> %149, i32 %148, i32 1 %151 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %150, <32 x i8> %53, <16 x i8> %55, i32 2) %152 = extractelement <4 x float> %151, i32 0 %153 = extractelement <4 x float> %151, i32 1 %154 = extractelement <4 x float> %151, i32 2 %155 = fadd float %152, -5.000000e-01 %156 = fadd float %153, -5.000000e-01 %157 = fadd float %154, -5.000000e-01 %158 = fmul float %155, %155 %159 = fmul float %156, %156 %160 = fadd float %159, %158 %161 = fmul float %157, %157 %162 = fadd float %160, %161 %163 = call float @llvm.AMDGPU.rsq.clamped.f32(float %162) %164 = fmul float %155, %163 %165 = fmul float %156, %163 %166 = fmul float %157, %163 %167 = fmul float %92, %164 %168 = fsub float -0.000000e+00, %167 %169 = fmul float %93, %165 %170 = fsub float %168, %169 %171 = fmul float %94, %166 %172 = fsub float %170, %171 %173 = fmul float %172, %164 %174 = fmul float %172, %165 %175 = fmul float %172, %166 %176 = fmul float %173, 2.000000e+00 %177 = fmul float %174, 2.000000e+00 %178 = fmul float %175, 2.000000e+00 %179 = fsub float -0.000000e+00, %176 %180 = fsub float %179, %92 %181 = fsub float -0.000000e+00, %177 %182 = fsub float %181, %93 %183 = fsub float -0.000000e+00, %178 %184 = fsub float %183, %94 %185 = fmul float %180, %100 %186 = fmul float %180, %101 %187 = fmul float %180, %102 %188 = fmul float %182, %103 %189 = fadd float %188, %185 %190 = fmul float %182, %104 %191 = fadd float %190, %186 %192 = fmul float %182, %105 %193 = fadd float %192, %187 %194 = fmul float %184, %97 %195 = fadd float %194, %189 %196 = fmul float %184, %98 %197 = fadd float %196, %191 %198 = fmul float %184, %99 %199 = fadd float %198, %193 %200 = bitcast float %95 to i32 %201 = bitcast float %96 to i32 %202 = insertelement <2 x i32> undef, i32 %200, i32 0 %203 = insertelement <2 x i32> %202, i32 %201, i32 1 %204 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %203, <32 x i8> %82, <16 x i8> %85, i32 2) %205 = extractelement <4 x float> %204, i32 0 %206 = extractelement <4 x float> %204, i32 1 %207 = extractelement <4 x float> %204, i32 2 %208 = fmul float %40, %195 %209 = fmul float %41, %195 %210 = fmul float %42, %195 %211 = fmul float %43, %197 %212 = fadd float %211, %208 %213 = fmul float %44, %197 %214 = fadd float %213, %209 %215 = fmul float %45, %197 %216 = fadd float %215, %210 %217 = fmul float %46, %199 %218 = fadd float %217, %212 %219 = fmul float %47, %199 %220 = fadd float %219, %214 %221 = fmul float %48, %199 %222 = fadd float %221, %216 %223 = insertelement <4 x float> undef, float %218, i32 0 %224 = insertelement <4 x float> %223, float %220, i32 1 %225 = insertelement <4 x float> %224, float %222, i32 2 %226 = insertelement <4 x float> %225, float 0.000000e+00, i32 3 %227 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %226) %228 = extractelement <4 x float> %227, i32 0 %229 = extractelement <4 x float> %227, i32 1 %230 = extractelement <4 x float> %227, i32 2 %231 = extractelement <4 x float> %227, i32 3 %232 = call float @fabs(float %230) %233 = fdiv float 1.000000e+00, %232 %234 = fmul float %228, %233 %235 = fadd float %234, 1.500000e+00 %236 = fmul float %229, %233 %237 = fadd float %236, 1.500000e+00 %238 = bitcast float %237 to i32 %239 = bitcast float %235 to i32 %240 = bitcast float %231 to i32 %241 = insertelement <4 x i32> undef, i32 %238, i32 0 %242 = insertelement <4 x i32> %241, i32 %239, i32 1 %243 = insertelement <4 x i32> %242, i32 %240, i32 2 %244 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %243, <32 x i8> %88, <16 x i8> %91, i32 4) %245 = extractelement <4 x float> %244, i32 0 %246 = extractelement <4 x float> %244, i32 1 %247 = extractelement <4 x float> %244, i32 2 %248 = fmul float %205, %245 %249 = fadd float %248, %144 %250 = fmul float %206, %246 %251 = fadd float %250, %145 %252 = fmul float %207, %247 %253 = fadd float %252, %146 %254 = fmul float %106, %106 %255 = fmul float %107, %107 %256 = fadd float %255, %254 %257 = fmul float %108, %108 %258 = fadd float %256, %257 %259 = call float @llvm.AMDGPU.rsq.clamped.f32(float %258) %260 = fmul float %106, %259 %261 = fmul float %107, %259 %262 = fmul float %108, %259 %263 = fmul float %164, %260 %264 = fmul float %165, %261 %265 = fadd float %264, %263 %266 = fmul float %166, %262 %267 = fadd float %265, %266 %268 = call float @llvm.maxnum.f32(float %267, float 0.000000e+00) %269 = fmul float %33, %268 %270 = fmul float %34, %268 %271 = fmul float %35, %268 %272 = fmul float %269, %49 %273 = fadd float %272, %30 %274 = fmul float %270, %50 %275 = fadd float %274, %31 %276 = fmul float %271, %51 %277 = fadd float %276, %32 %278 = fmul float %249, %273 %279 = fmul float %251, %275 %280 = fmul float %253, %277 %281 = bitcast float %95 to i32 %282 = bitcast float %96 to i32 %283 = insertelement <2 x i32> undef, i32 %281, i32 0 %284 = insertelement <2 x i32> %283, i32 %282, i32 1 %285 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %284, <32 x i8> %64, <16 x i8> %67, i32 2) %286 = extractelement <4 x float> %285, i32 0 %287 = extractelement <4 x float> %285, i32 1 %288 = extractelement <4 x float> %285, i32 2 %289 = fmul float %286, %36 %290 = fadd float %289, %278 %291 = fmul float %287, %37 %292 = fadd float %291, %279 %293 = fmul float %288, %38 %294 = fadd float %293, %280 %295 = call i32 @llvm.SI.packf16(float %290, float %292) %296 = bitcast i32 %295 to float %297 = call i32 @llvm.SI.packf16(float %294, float %118) %298 = bitcast i32 %297 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %296, float %298, float %296, float %298) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s68, s[0:3], 0x31 ; C2220131 s_buffer_load_dword s69, s[0:3], 0x32 ; C2228132 s_buffer_load_dword s70, s[0:3], 0x34 ; C2230134 s_buffer_load_dword s71, s[0:3], 0x35 ; C2238135 s_buffer_load_dword s72, s[0:3], 0x36 ; C2240136 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 0, 3, [m0] ; C8280C00 v_interp_p2_f32 v10, [v10], v1, 0, 3, [m0] ; C8290C01 v_interp_p1_f32 v11, v0, 1, 3, [m0] ; C82C0D00 v_interp_p2_f32 v11, [v11], v1, 1, 3, [m0] ; C82D0D01 v_interp_p1_f32 v12, v0, 2, 3, [m0] ; C8300E00 v_interp_p2_f32 v12, [v12], v1, 2, 3, [m0] ; C8310E01 v_interp_p1_f32 v13, v0, 0, 4, [m0] ; C8341000 v_interp_p2_f32 v13, [v13], v1, 0, 4, [m0] ; C8351001 v_interp_p1_f32 v14, v0, 1, 4, [m0] ; C8381100 v_interp_p2_f32 v14, [v14], v1, 1, 4, [m0] ; C8391101 v_interp_p1_f32 v15, v0, 2, 4, [m0] ; C83C1200 v_interp_p2_f32 v15, [v15], v1, 2, 4, [m0] ; C83D1201 v_interp_p1_f32 v16, v0, 0, 5, [m0] ; C8401400 v_interp_p2_f32 v16, [v16], v1, 0, 5, [m0] ; C8411401 v_interp_p1_f32 v17, v0, 1, 5, [m0] ; C8441500 v_interp_p2_f32 v17, [v17], v1, 1, 5, [m0] ; C8451501 v_interp_p1_f32 v18, v0, 2, 5, [m0] ; C8481600 v_interp_p2_f32 v18, [v18], v1, 2, 5, [m0] ; C8491601 s_load_dwordx4 s[40:43], s[4:5], 0x4 ; C0940504 s_load_dwordx4 s[8:11], s[4:5], 0x8 ; C0840508 s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C s_load_dwordx4 s[28:31], s[4:5], 0x10 ; C08E0510 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_load_dwordx8 s[60:67], s[6:7], 0x8 ; C0DE0708 s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710 s_load_dwordx8 s[44:51], s[6:7], 0x18 ; C0D60718 s_load_dwordx8 s[52:59], s[6:7], 0x20 ; C0DA0720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[60:67], s[40:43] ; F0800F00 014F1305 image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[52:59], s[28:31] ; F0800700 00ED1705 image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[44:51], s[24:27] ; F0800700 00CB1A05 image_sample v[29:31], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[32:39], s[20:23] ; F0800700 00A81D05 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, -0.5, v29 ; 06003AF1 v_add_f32_e32 v1, -0.5, v30 ; 06023CF1 v_add_f32_e32 v29, -0.5, v31 ; 063A3EF1 v_mul_f32_e32 v30, v0, v0 ; 103C0100 v_mad_f32 v30, v1, v1, v30 ; D282001E 047A0301 v_mad_f32 v30, v29, v29, v30 ; D282001E 047A3B1D v_rsq_clamp_f32_e32 v30, v30 ; 7E3C591E v_mul_f32_e32 v0, v30, v0 ; 1000011E v_mul_f32_e32 v1, v30, v1 ; 1002031E v_mul_f32_e32 v29, v30, v29 ; 103A3B1E v_mul_f32_e32 v30, v0, v2 ; 103C0500 v_mad_f32 v30, -v3, v1, -v30 ; D282001E A47A0303 v_mad_f32 v30, -v4, v29, v30 ; D282001E 247A3B04 v_mul_f32_e32 v31, v0, v30 ; 103E3D00 v_mad_f32 v31, v30, v0, v31 ; D282001F 047E011E v_sub_f32_e64 v2, -v31, v2 ; D2080002 2002051F v_mul_f32_e32 v31, v1, v30 ; 103E3D01 v_mad_f32 v31, v30, v1, v31 ; D282001F 047E031E v_sub_f32_e64 v3, -v31, v3 ; D2080003 2002071F v_mul_f32_e32 v10, v10, v2 ; 1014050A v_mul_f32_e32 v11, v11, v2 ; 1016050B v_mul_f32_e32 v2, v12, v2 ; 1004050C v_mad_f32 v10, v3, v13, v10 ; D282000A 042A1B03 v_mad_f32 v11, v3, v14, v11 ; D282000B 042E1D03 v_mad_f32 v2, v3, v15, v2 ; D2820002 040A1F03 s_buffer_load_dword s20, s[0:3], 0x2c ; C20A012C s_buffer_load_dword s21, s[0:3], 0x2d ; C20A812D s_buffer_load_dword s22, s[0:3], 0x2e ; C20B012E s_buffer_load_dword s23, s[0:3], 0x30 ; C20B8130 v_mul_f32_e32 v3, v29, v30 ; 10063D1D v_mad_f32 v3, v30, v29, v3 ; D2820003 040E3B1E v_sub_f32_e64 v3, -v3, v4 ; D2080003 20020903 v_mad_f32 v4, v3, v7, v10 ; D2820004 042A0F03 v_mad_f32 v7, v3, v8, v11 ; D2820007 042E1103 v_mad_f32 v2, v3, v9, v2 ; D2820002 040A1303 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s20, v4 ; 10060814 v_mul_f32_e32 v8, s21, v4 ; 10100815 v_mul_f32_e32 v4, s22, v4 ; 10080816 v_mad_f32 v3, s23, v7, v3 ; D2820003 040E0E17 v_mad_f32 v8, s68, v7, v8 ; D2820008 04220E44 v_mad_f32 v4, s69, v7, v4 ; D2820004 04120E45 v_mad_f32 v9, s70, v2, v3 ; D2820009 040E0446 v_mad_f32 v10, s71, v2, v8 ; D282000A 04220447 v_mad_f32 v11, s72, v2, v4 ; D282000B 04120448 v_mov_b32_e32 v12, 0 ; 7E180280 v_cubeid_f32 v33, v9, v10, v11 ; D2880021 042E1509 v_cubema_f32 v32, v9, v10, v11 ; D28E0020 042E1509 s_load_dwordx4 s[20:23], s[4:5], 0x14 ; C08A0514 s_load_dwordx8 s[24:31], s[6:7], 0x28 ; C0CC0728 v_cubesc_f32 v31, v9, v10, v11 ; D28A001F 042E1509 v_cubetc_f32 v30, v9, v10, v11 ; D28C001E 042E1509 s_buffer_load_dword s32, s[0:3], 0x14 ; C2100114 s_load_dwordx4 s[36:39], s[4:5], 0x18 ; C0920518 s_load_dwordx8 s[40:47], s[6:7], 0x30 ; C0D40730 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_buffer_load_dword s5, s[0:3], 0x16 ; C2028116 s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118 v_rcp_f32_e64 v2, |v32| ; D3540102 00000120 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[24:31], s[20:23] ; F0800700 00A60705 s_buffer_load_dword s7, s[0:3], 0x19 ; C2038119 v_mov_b32_e32 v3, 0x3fc00000 ; 7E0602FF 3FC00000 v_mad_f32 v32, v30, v2, v3 ; D2820020 040E051E v_mad_f32 v31, v31, v2, v3 ; D282001F 040E051F image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[40:47], s[36:39] ; F0800700 012A021F s_buffer_load_dword s20, s[0:3], 0x1a ; C20A011A; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = extractelement <4 x float> %46, i32 3 %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = add i32 %5, %7 %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %53) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = extractelement <4 x float> %54, i32 3 %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = add i32 %5, %7 %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %61) %63 = extractelement <4 x float> %62, i32 0 %64 = extractelement <4 x float> %62, i32 1 %65 = extractelement <4 x float> %62, i32 2 %66 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = add i32 %5, %7 %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %68) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = extractelement <4 x float> %69, i32 2 %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = add i32 %5, %7 %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %75) %77 = extractelement <4 x float> %76, i32 0 %78 = extractelement <4 x float> %76, i32 1 %79 = extractelement <4 x float> %76, i32 2 %80 = fmul float %32, %55 %81 = fmul float %33, %55 %82 = fmul float %34, %55 %83 = fmul float %35, %56 %84 = fadd float %83, %80 %85 = fmul float %36, %56 %86 = fadd float %85, %81 %87 = fmul float %37, %56 %88 = fadd float %87, %82 %89 = fmul float %38, %57 %90 = fadd float %89, %84 %91 = fmul float %39, %57 %92 = fadd float %91, %86 %93 = fmul float %40, %57 %94 = fadd float %93, %88 %95 = fmul float %41, %58 %96 = fadd float %95, %90 %97 = fmul float %42, %58 %98 = fadd float %97, %92 %99 = fmul float %29, %63 %100 = fmul float %30, %64 %101 = fadd float %100, %99 %102 = fmul float %31, %65 %103 = fadd float %101, %102 %104 = fmul float %29, %70 %105 = fmul float %30, %71 %106 = fadd float %105, %104 %107 = fmul float %31, %72 %108 = fadd float %106, %107 %109 = fmul float %29, %77 %110 = fmul float %30, %78 %111 = fadd float %110, %109 %112 = fmul float %31, %79 %113 = fadd float %111, %112 %114 = fmul float %13, %47 %115 = fmul float %14, %47 %116 = fmul float %15, %47 %117 = fmul float %16, %47 %118 = fmul float %17, %48 %119 = fadd float %118, %114 %120 = fmul float %18, %48 %121 = fadd float %120, %115 %122 = fmul float %19, %48 %123 = fadd float %122, %116 %124 = fmul float %20, %48 %125 = fadd float %124, %117 %126 = fmul float %21, %49 %127 = fadd float %126, %119 %128 = fmul float %22, %49 %129 = fadd float %128, %121 %130 = fmul float %23, %49 %131 = fadd float %130, %123 %132 = fmul float %24, %49 %133 = fadd float %132, %125 %134 = fmul float %25, %50 %135 = fadd float %134, %127 %136 = fmul float %26, %50 %137 = fadd float %136, %129 %138 = fmul float %27, %50 %139 = fadd float %138, %131 %140 = fmul float %28, %50 %141 = fadd float %140, %133 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %96, float %98, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %103, float %108, float %113, float %94) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %135, float %137, float %139, float %141) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s24, s[0:3], 0xf ; C20C010F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 buffer_load_format_xyzw v[14:17], v0, s[20:23], 0 idxen ; E00C2000 80050E00 buffer_load_format_xyzw v[18:21], v0, s[8:11], 0 idxen ; E00C2000 80021200 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_buffer_load_dword s6, s[0:3], 0x12 ; C2030112 s_buffer_load_dword s7, s[0:3], 0x14 ; C2038114 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x17 ; C2048117 s_buffer_load_dword s10, s[0:3], 0x18 ; C2050118 s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119 s_buffer_load_dword s12, s[0:3], 0x1b ; C206011B s_buffer_load_dword s13, s[0:3], 0x0 ; C2068100 s_buffer_load_dword s14, s[0:3], 0x1 ; C2070101 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s16, s[0:3], 0x3 ; C2080103 s_buffer_load_dword s17, s[0:3], 0x4 ; C2088104 s_buffer_load_dword s18, s[0:3], 0x1c ; C209011C s_buffer_load_dword s19, s[0:3], 0x1d ; C209811D s_buffer_load_dword s20, s[0:3], 0x1f ; C20A011F s_buffer_load_dword s21, s[0:3], 0x20 ; C20A8120 s_buffer_load_dword s22, s[0:3], 0x21 ; C20B0121 s_buffer_load_dword s23, s[0:3], 0x5 ; C20B8105 s_buffer_load_dword s25, s[0:3], 0x6 ; C20C8106 s_buffer_load_dword s26, s[0:3], 0x7 ; C20D0107 s_buffer_load_dword s27, s[0:3], 0x8 ; C20D8108 s_buffer_load_dword s28, s[0:3], 0x9 ; C20E0109 s_buffer_load_dword s29, s[0:3], 0xa ; C20E810A s_buffer_load_dword s30, s[0:3], 0xb ; C20F010B s_buffer_load_dword s31, s[0:3], 0xc ; C20F810C s_buffer_load_dword s32, s[0:3], 0xd ; C210010D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s13, v2 ; 1000040D v_mul_f32_e32 v22, s7, v6 ; 102C0C07 v_mul_f32_e32 v23, s4, v10 ; 102E1404 v_mad_f32 v23, s5, v11, v23 ; D2820017 045E1605 v_mad_f32 v10, s6, v12, v23 ; D282000A 045E1806 v_mul_f32_e32 v11, s4, v14 ; 10161C04 v_mad_f32 v11, s5, v15, v11 ; D282000B 042E1E05 v_mad_f32 v11, s6, v16, v11 ; D282000B 042E2006 v_mul_f32_e32 v12, s4, v18 ; 10182404 v_mad_f32 v12, s5, v19, v12 ; D282000C 04322605 v_mad_f32 v12, s6, v20, v12 ; D282000C 04322806 v_mad_f32 v13, s10, v7, v22 ; D282000D 045A0E0A v_mul_f32_e32 v14, s8, v6 ; 101C0C08 v_mad_f32 v14, s11, v7, v14 ; D282000E 043A0E0B v_mul_f32_e32 v15, s9, v6 ; 101E0C09 v_mad_f32 v15, s12, v7, v15 ; D282000F 043E0E0C v_mad_f32 v13, s18, v8, v13 ; D282000D 04361012 v_mad_f32 v14, s19, v8, v14 ; D282000E 043A1013 v_mad_f32 v15, s20, v8, v15 ; D282000F 043E1014 v_mad_f32 v13, s21, v9, v13 ; D282000D 04361215 v_mad_f32 v6, s22, v9, v14 ; D2820006 043A1216; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %40 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %41 = load <32 x i8>, <32 x i8> addrspace(2)* %40, align 32, !tbaa !0 %42 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %43 = load <16 x i8>, <16 x i8> addrspace(2)* %42, align 16, !tbaa !0 %44 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %45 = bitcast <8 x i32> addrspace(2)* %44 to <32 x i8> addrspace(2)* %46 = load <32 x i8>, <32 x i8> addrspace(2)* %45, align 32, !tbaa !0 %47 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %48 = bitcast <4 x i32> addrspace(2)* %47 to <16 x i8> addrspace(2)* %49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !tbaa !0 %50 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %51 = bitcast <8 x i32> addrspace(2)* %50 to <32 x i8> addrspace(2)* %52 = load <32 x i8>, <32 x i8> addrspace(2)* %51, align 32, !tbaa !0 %53 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %54 = bitcast <4 x i32> addrspace(2)* %53 to <16 x i8> addrspace(2)* %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %57 = bitcast <8 x i32> addrspace(2)* %56 to <32 x i8> addrspace(2)* %58 = load <32 x i8>, <32 x i8> addrspace(2)* %57, align 32, !tbaa !0 %59 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %60 = bitcast <4 x i32> addrspace(2)* %59 to <16 x i8> addrspace(2)* %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 %62 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %63 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %66 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %67 = bitcast float %62 to i32 %68 = bitcast float %63 to i32 %69 = insertelement <2 x i32> undef, i32 %67, i32 0 %70 = insertelement <2 x i32> %69, i32 %68, i32 1 %71 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %70, <32 x i8> %46, <16 x i8> %49, i32 2) %72 = extractelement <4 x float> %71, i32 0 %73 = extractelement <4 x float> %71, i32 1 %74 = extractelement <4 x float> %71, i32 2 %75 = extractelement <4 x float> %71, i32 3 %76 = fcmp olt float %75, 5.000000e-01 %77 = select i1 %76, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %77) %78 = fmul float %75, %36 %79 = bitcast float %62 to i32 %80 = bitcast float %63 to i32 %81 = insertelement <2 x i32> undef, i32 %79, i32 0 %82 = insertelement <2 x i32> %81, i32 %80, i32 1 %83 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %82, <32 x i8> %58, <16 x i8> %61, i32 2) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = bitcast float %62 to i32 %88 = bitcast float %63 to i32 %89 = insertelement <2 x i32> undef, i32 %87, i32 0 %90 = insertelement <2 x i32> %89, i32 %88, i32 1 %91 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %90, <32 x i8> %52, <16 x i8> %55, i32 2) %92 = extractelement <4 x float> %91, i32 0 %93 = extractelement <4 x float> %91, i32 1 %94 = extractelement <4 x float> %91, i32 2 %95 = fmul float %92, %24 %96 = fmul float %93, %25 %97 = fmul float %94, %26 %98 = fmul float %84, %27 %99 = fadd float %98, %95 %100 = fmul float %85, %28 %101 = fadd float %100, %96 %102 = fmul float %86, %29 %103 = fadd float %102, %97 %104 = fadd float %72, %99 %105 = fadd float %73, %101 %106 = fadd float %74, %103 %107 = bitcast float %62 to i32 %108 = bitcast float %63 to i32 %109 = insertelement <2 x i32> undef, i32 %107, i32 0 %110 = insertelement <2 x i32> %109, i32 %108, i32 1 %111 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %110, <32 x i8> %41, <16 x i8> %43, i32 2) %112 = extractelement <4 x float> %111, i32 0 %113 = extractelement <4 x float> %111, i32 1 %114 = extractelement <4 x float> %111, i32 2 %115 = fadd float %112, -5.000000e-01 %116 = fadd float %113, -5.000000e-01 %117 = fadd float %114, -5.000000e-01 %118 = fmul float %115, %115 %119 = fmul float %116, %116 %120 = fadd float %119, %118 %121 = fmul float %117, %117 %122 = fadd float %120, %121 %123 = call float @llvm.AMDGPU.rsq.clamped.f32(float %122) %124 = fmul float %115, %123 %125 = fmul float %116, %123 %126 = fmul float %117, %123 %127 = fmul float %64, %64 %128 = fmul float %65, %65 %129 = fadd float %128, %127 %130 = fmul float %66, %66 %131 = fadd float %129, %130 %132 = call float @llvm.AMDGPU.rsq.clamped.f32(float %131) %133 = fmul float %64, %132 %134 = fmul float %65, %132 %135 = fmul float %66, %132 %136 = fmul float %124, %133 %137 = fmul float %125, %134 %138 = fadd float %137, %136 %139 = fmul float %126, %135 %140 = fadd float %138, %139 %141 = call float @llvm.maxnum.f32(float %140, float 0.000000e+00) %142 = fmul float %33, %141 %143 = fmul float %34, %141 %144 = fmul float %35, %141 %145 = fmul float %142, %37 %146 = fadd float %145, %30 %147 = fmul float %143, %38 %148 = fadd float %147, %31 %149 = fmul float %144, %39 %150 = fadd float %149, %32 %151 = fmul float %104, %146 %152 = fmul float %105, %148 %153 = fmul float %106, %150 %154 = call i32 @llvm.SI.packf16(float %151, float %152) %155 = bitcast i32 %154 to float %156 = call i32 @llvm.SI.packf16(float %153, float %78) %157 = bitcast i32 %156 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %155, float %157, float %155, float %157) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx8 s[32:39], s[6:7], 0x8 ; C0D00708 s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710 s_load_dwordx8 s[48:55], s[6:7], 0x18 ; C0D80718 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_buffer_load_dword s6, s[0:3], 0x12 ; C2030112 s_buffer_load_dword s7, s[0:3], 0x14 ; C2038114 s_buffer_load_dword s56, s[0:3], 0x15 ; C21C0115 s_buffer_load_dword s57, s[0:3], 0x16 ; C21C8116 image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[12:15] ; F0800F00 00680702 s_buffer_load_dword s12, s[0:3], 0x18 ; C2060118 s_buffer_load_dword s13, s[0:3], 0x19 ; C2068119 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_cmp_gt_f32_e32 vcc, 0.5, v10 ; 7C0814F0 v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080 image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[48:55], s[20:23] ; F0800700 00AC0B02 image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[40:47], s[16:19] ; F0800700 008A0E02 image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[8:11] ; F0800700 00460002 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v3, v14, s4, v7 ; D2820003 041C090E v_mad_f32 v3, v11, s7, v3 ; D2820003 040C0F0B v_mad_f32 v17, v15, s5, v8 ; D2820011 04200B0FSHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL OUT[6], GENERIC[14] DCL CONST[0..9] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MUL TEMP[1], CONST[6], IN[1].xxxx 2: MAD TEMP[1], CONST[7], IN[1].yyyy, TEMP[1] 3: MAD TEMP[1], CONST[8], IN[1].zzzz, TEMP[1] 4: MAD TEMP[1].xy, CONST[9], IN[1].wwww, TEMP[1] 5: MOV TEMP[0].xy, TEMP[1].xyxx 6: DP3 TEMP[1].x, CONST[5].xyzz, IN[2].xyzz 7: DP3 TEMP[2].x, CONST[5].xyzz, IN[3].xyzz 8: MOV TEMP[1].y, TEMP[2].xxxx 9: DP3 TEMP[2].x, CONST[5].xyzz, IN[4].xyzz 10: MOV TEMP[1].z, TEMP[2].xxxx 11: ADD TEMP[2].xyz, CONST[4].xyzz, -IN[0].xyzz 12: DP3 TEMP[3].x, TEMP[2].xyzz, IN[2].xyzz 13: DP3 TEMP[4].x, TEMP[2].xyzz, IN[3].xyzz 14: MOV TEMP[3].y, TEMP[4].xxxx 15: DP3 TEMP[2].x, TEMP[2].xyzz, IN[4].xyzz 16: MOV TEMP[3].z, TEMP[2].xxxx 17: MOV TEMP[3].w, IMM[0].xxxx 18: MOV TEMP[2].w, IMM[0].xxxx 19: MOV TEMP[2].xyz, IN[2].xyzx 20: MOV TEMP[4].w, IMM[0].xxxx 21: MOV TEMP[4].xyz, IN[3].xyzx 22: MOV TEMP[5].w, IMM[0].xxxx 23: MOV TEMP[5].xyz, IN[4].xyzx 24: MUL TEMP[6], CONST[0], IN[0].xxxx 25: MAD TEMP[6], CONST[1], IN[0].yyyy, TEMP[6] 26: MAD TEMP[6], CONST[2], IN[0].zzzz, TEMP[6] 27: MAD TEMP[6], CONST[3], IN[0].wwww, TEMP[6] 28: MOV TEMP[1].xyz, TEMP[1].xyzx 29: MOV OUT[6], TEMP[1] 30: MOV OUT[3], TEMP[5] 31: MOV OUT[4], TEMP[2] 32: MOV OUT[0], TEMP[6] 33: MOV OUT[5], TEMP[4] 34: MOV OUT[1], TEMP[3] 35: MOV OUT[2], TEMP[0] 36: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = extractelement <4 x float> %57, i32 3 %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 %64 = add i32 %5, %7 %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %64) %66 = extractelement <4 x float> %65, i32 0 %67 = extractelement <4 x float> %65, i32 1 %68 = extractelement <4 x float> %65, i32 2 %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = add i32 %5, %7 %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %71) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = extractelement <4 x float> %72, i32 2 %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !tbaa !0 %78 = add i32 %5, %7 %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %77, i32 0, i32 %78) %80 = extractelement <4 x float> %79, i32 0 %81 = extractelement <4 x float> %79, i32 1 %82 = extractelement <4 x float> %79, i32 2 %83 = fmul float %35, %58 %84 = fmul float %36, %58 %85 = fmul float %37, %58 %86 = fmul float %38, %59 %87 = fadd float %86, %83 %88 = fmul float %39, %59 %89 = fadd float %88, %84 %90 = fmul float %40, %59 %91 = fadd float %90, %85 %92 = fmul float %41, %60 %93 = fadd float %92, %87 %94 = fmul float %42, %60 %95 = fadd float %94, %89 %96 = fmul float %43, %60 %97 = fadd float %96, %91 %98 = fmul float %44, %61 %99 = fadd float %98, %93 %100 = fmul float %45, %61 %101 = fadd float %100, %95 %102 = fmul float %32, %66 %103 = fmul float %33, %67 %104 = fadd float %103, %102 %105 = fmul float %34, %68 %106 = fadd float %104, %105 %107 = fmul float %32, %73 %108 = fmul float %33, %74 %109 = fadd float %108, %107 %110 = fmul float %34, %75 %111 = fadd float %109, %110 %112 = fmul float %32, %80 %113 = fmul float %33, %81 %114 = fadd float %113, %112 %115 = fmul float %34, %82 %116 = fadd float %114, %115 %117 = fsub float %29, %50 %118 = fsub float %30, %51 %119 = fsub float %31, %52 %120 = fmul float %117, %66 %121 = fmul float %118, %67 %122 = fadd float %121, %120 %123 = fmul float %119, %68 %124 = fadd float %122, %123 %125 = fmul float %117, %73 %126 = fmul float %118, %74 %127 = fadd float %126, %125 %128 = fmul float %119, %75 %129 = fadd float %127, %128 %130 = fmul float %117, %80 %131 = fmul float %118, %81 %132 = fadd float %131, %130 %133 = fmul float %119, %82 %134 = fadd float %132, %133 %135 = fmul float %13, %50 %136 = fmul float %14, %50 %137 = fmul float %15, %50 %138 = fmul float %16, %50 %139 = fmul float %17, %51 %140 = fadd float %139, %135 %141 = fmul float %18, %51 %142 = fadd float %141, %136 %143 = fmul float %19, %51 %144 = fadd float %143, %137 %145 = fmul float %20, %51 %146 = fadd float %145, %138 %147 = fmul float %21, %52 %148 = fadd float %147, %140 %149 = fmul float %22, %52 %150 = fadd float %149, %142 %151 = fmul float %23, %52 %152 = fadd float %151, %144 %153 = fmul float %24, %52 %154 = fadd float %153, %146 %155 = fmul float %25, %53 %156 = fadd float %155, %148 %157 = fmul float %26, %53 %158 = fadd float %157, %150 %159 = fmul float %27, %53 %160 = fadd float %159, %152 %161 = fmul float %28, %53 %162 = fadd float %161, %154 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %124, float %129, float %134, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %99, float %101, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %80, float %81, float %82, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %66, float %67, float %68, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %73, float %74, float %75, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %106, float %111, float %116, float %97) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %156, float %158, float %160, float %162) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s24, s[0:3], 0xf ; C20C010F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 buffer_load_format_xyzw v[14:17], v0, s[20:23], 0 idxen ; E00C2000 80050E00 buffer_load_format_xyzw v[18:21], v0, s[8:11], 0 idxen ; E00C2000 80021200 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_buffer_load_dword s6, s[0:3], 0x12 ; C2030112 s_buffer_load_dword s7, s[0:3], 0x14 ; C2038114 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x16 ; C2048116 s_buffer_load_dword s10, s[0:3], 0x18 ; C2050118 s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119 s_buffer_load_dword s12, s[0:3], 0x1b ; C206011B s_buffer_load_dword s13, s[0:3], 0x1c ; C206811C s_buffer_load_dword s14, s[0:3], 0x1d ; C207011D s_buffer_load_dword s15, s[0:3], 0x1f ; C207811F s_buffer_load_dword s16, s[0:3], 0x20 ; C2080120 s_buffer_load_dword s17, s[0:3], 0x21 ; C2088121 s_buffer_load_dword s18, s[0:3], 0x23 ; C2090123 s_buffer_load_dword s19, s[0:3], 0x24 ; C2098124 s_buffer_load_dword s20, s[0:3], 0x25 ; C20A0125 s_buffer_load_dword s21, s[0:3], 0x0 ; C20A8100 s_buffer_load_dword s22, s[0:3], 0x1 ; C20B0101 s_buffer_load_dword s23, s[0:3], 0x2 ; C20B8102 s_buffer_load_dword s25, s[0:3], 0x3 ; C20C8103 s_buffer_load_dword s26, s[0:3], 0x4 ; C20D0104 s_buffer_load_dword s27, s[0:3], 0x5 ; C20D8105 s_buffer_load_dword s28, s[0:3], 0x6 ; C20E0106 s_buffer_load_dword s29, s[0:3], 0x7 ; C20E8107 s_buffer_load_dword s30, s[0:3], 0x8 ; C20F0108 s_buffer_load_dword s31, s[0:3], 0x9 ; C20F8109 s_buffer_load_dword s32, s[0:3], 0xa ; C210010A s_buffer_load_dword s33, s[0:3], 0xb ; C210810B s_buffer_load_dword s34, s[0:3], 0xc ; C211010C s_buffer_load_dword s35, s[0:3], 0xd ; C211810D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s21, v2 ; 10000415 v_mul_f32_e32 v22, s10, v6 ; 102C0C0A v_mad_f32 v22, s13, v7, v22 ; D2820016 045A0E0D v_mul_f32_e32 v23, s11, v6 ; 102E0C0B v_mad_f32 v23, s14, v7, v23 ; D2820017 045E0E0E v_mul_f32_e32 v24, s12, v6 ; 10300C0C v_mad_f32 v24, s15, v7, v24 ; D2820018 04620E0F v_mad_f32 v22, s16, v8, v22 ; D2820016 045A1010 v_mad_f32 v23, s17, v8, v23 ; D2820017 045E1011 v_mad_f32 v24, s18, v8, v24 ; D2820018 04621012 v_mad_f32 v22, s19, v9, v22 ; D2820016 045A1213 v_mad_f32 v6, s20, v9, v23 ; D2820006 045E1214 v_mad_f32 v0, s26, v3, v0 ; D2820000 0402061A v_mul_f32_e32 v7, s22, v2 ; 100E0416 v_mad_f32 v7, s27, v3, v7 ; D2820007 041E061B v_mul_f32_e32 v8, s23, v2 ; 10100417 v_mad_f32 v8, s28, v3, v8 ; D2820008 0422061C v_mul_f32_e32 v9, s25, v2 ; 10120419 v_mad_f32 v9, s29, v3, v9 ; D2820009 0426061D v_mad_f32 v0, s30, v4, v0 ; D2820000 0402081E v_mad_f32 v7, s31, v4, v7 ; D2820007 041E081F v_mad_f32 v8, s32, v4, v8 ; D2820008 04220820 v_mad_f32 v9, s33, v4, v9 ; D2820009 04260821 v_mad_f32 v0, s34, v5, v0 ; D2820000 04020A22 v_mad_f32 v7, s35, v5, v7 ; D2820007 041E0A23 v_mad_f32 v8, s0, v5, v8 ; D2820008 04220A00 v_mad_f32 v9, s24, v5, v9 ; D2820009 04260A18 v_sub_f32_e32 v23, s4, v2 ; 082E0404 v_sub_f32_e32 v25, s5, v3 ; 08320605 v_sub_f32_e32 v2, s6, v4 ; 08040806 v_mul_f32_e32 v3, v10, v23 ; 10062F0A v_mul_f32_e32 v4, v14, v23 ; 10082F0E v_mul_f32_e32 v5, v18, v23 ; 100A2F12 v_mad_f32 v3, v25, v11, v3 ; D2820003 040E1719 v_mad_f32 v4, v25, v15, v4 ; D2820004 04121F19 v_mad_f32 v5, v25, v19, v5 ; D2820005 04162719 v_mad_f32 v3, v2, v12, v3 ; D2820003 040E1902 v_mad_f32 v4, v2, v16, v4 ; D2820004 04122102 v_mad_f32 v2, v2, v20, v5 ; D2820002 04162902 exp 15, 32, 0, 0, 0, v3, v4, v2, v1 ; F800020F 01020403 exp 15, 33, 0, 0, 0, v22, v6, v1, v1 ; F800021F 01010616 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, s7, v18 ; 10042407 v_mad_f32 v2, s8, v19, v2 ; D2820002 040A2608 v_mad_f32 v2, s9, v20, v2 ; D2820002 040A2809 exp 15, 34, 0, 0, 0, v18, v19, v20, v1 ; F800022F 01141312 v_mul_f32_e32 v3, s7, v10 ; 10061407 v_mul_f32_e32 v4, s7, v14 ; 10081C07 v_mad_f32 v3, s8, v11, v3 ; D2820003 040E1608 v_mad_f32 v4, s8, v15, v4 ; D2820004 04121E08 v_mad_f32 v3, s9, v12, v3 ; D2820003 040E1809 v_mad_f32 v4, s9, v16, v4 ; D2820004 04122009 exp 15, 35, 0, 0, 0, v10, v11, v12, v1 ; F800023F 010C0B0A exp 15, 36, 0, 0, 0, v14, v15, v16, v1 ; F800024F 01100F0E exp 15, 37, 0, 0, 0, v3, v4, v2, v24 ; F800025F 18020403 exp 15, 12, 0, 1, 0, v0, v7, v8, v9 ; F80008CF 09080700 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 28 Code Size: 596 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL IN[5], GENERIC[14], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[3..10] DCL CONST[13] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { -0.5000, 2.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D 2: MUL TEMP[1].x, TEMP[0].wwww, CONST[6].xxxx 3: MOV TEMP[1].w, TEMP[1].xxxx 4: MOV TEMP[2].xy, IN[1].xyyy 5: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D 6: ADD TEMP[2].xyz, TEMP[2].xyzz, IMM[0].xxxx 7: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 8: RSQ TEMP[3].x, TEMP[3].xxxx 9: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 10: MOV TEMP[3].xyz, -IN[0].xyzx 11: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[3].xyzz 12: MUL TEMP[4].xyz, TEMP[4].xxxx, TEMP[2].xyzz 13: MUL TEMP[4].xyz, IMM[0].yyyy, TEMP[4].xyzz 14: ADD TEMP[3].xyz, TEMP[3].xyzz, -TEMP[4].xyzz 15: MUL TEMP[4].xyz, TEMP[3].xxxx, IN[3].xyzz 16: MAD TEMP[4].xyz, TEMP[3].yyyy, IN[4].xyzz, TEMP[4].xyzz 17: MAD TEMP[3].xyz, TEMP[3].zzzz, IN[2].xyzz, TEMP[4].xyzz 18: MOV TEMP[4].xy, IN[1].xyyy 19: TEX TEMP[4].xyz, TEMP[4], SAMP[3], 2D 20: MUL TEMP[5], CONST[7], TEMP[3].xxxx 21: MAD TEMP[5], CONST[8], TEMP[3].yyyy, TEMP[5] 22: MAD TEMP[3].xyz, CONST[9], TEMP[3].zzzz, TEMP[5] 23: MOV TEMP[3].xyz, TEMP[3].xyzz 24: TEX TEMP[3].xyz, TEMP[3], SAMP[4], CUBE 25: MAD TEMP[0].xyz, TEMP[4].xyzz, TEMP[3].xyzz, TEMP[0].xyzz 26: DP3 TEMP[3].x, IN[5].xyzz, IN[5].xyzz 27: RSQ TEMP[3].x, TEMP[3].xxxx 28: MUL TEMP[3].xyz, IN[5].xyzz, TEMP[3].xxxx 29: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[3].xyzz 30: MAX TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz 31: MUL TEMP[2].xyz, CONST[4].xyzz, TEMP[2].xxxx 32: MAD TEMP[2].xyz, TEMP[2].xyzz, CONST[13].xyzz, CONST[3].xyzz 33: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[2].xyzz 34: MOV TEMP[0].xy, IN[1].xyyy 35: TEX TEMP[0].xyz, TEMP[0], SAMP[2], 2D 36: MAD TEMP[1].xyz, TEMP[0].xyzz, CONST[5].xyzz, TEMP[1].xyzz 37: MOV OUT[0], TEMP[1] 38: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %46 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %47 = load <32 x i8>, <32 x i8> addrspace(2)* %46, align 32, !tbaa !0 %48 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !tbaa !0 %50 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %51 = bitcast <8 x i32> addrspace(2)* %50 to <32 x i8> addrspace(2)* %52 = load <32 x i8>, <32 x i8> addrspace(2)* %51, align 32, !tbaa !0 %53 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %54 = bitcast <4 x i32> addrspace(2)* %53 to <16 x i8> addrspace(2)* %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %57 = bitcast <8 x i32> addrspace(2)* %56 to <32 x i8> addrspace(2)* %58 = load <32 x i8>, <32 x i8> addrspace(2)* %57, align 32, !tbaa !0 %59 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %60 = bitcast <4 x i32> addrspace(2)* %59 to <16 x i8> addrspace(2)* %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 %62 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %63 = bitcast <8 x i32> addrspace(2)* %62 to <32 x i8> addrspace(2)* %64 = load <32 x i8>, <32 x i8> addrspace(2)* %63, align 32, !tbaa !0 %65 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %66 = bitcast <4 x i32> addrspace(2)* %65 to <16 x i8> addrspace(2)* %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %69 = bitcast <8 x i32> addrspace(2)* %68 to <32 x i8> addrspace(2)* %70 = load <32 x i8>, <32 x i8> addrspace(2)* %69, align 32, !tbaa !0 %71 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %72 = bitcast <4 x i32> addrspace(2)* %71 to <16 x i8> addrspace(2)* %73 = load <16 x i8>, <16 x i8> addrspace(2)* %72, align 16, !tbaa !0 %74 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %91 = bitcast float %77 to i32 %92 = bitcast float %78 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %94, <32 x i8> %52, <16 x i8> %55, i32 2) %96 = extractelement <4 x float> %95, i32 0 %97 = extractelement <4 x float> %95, i32 1 %98 = extractelement <4 x float> %95, i32 2 %99 = extractelement <4 x float> %95, i32 3 %100 = fmul float %99, %33 %101 = bitcast float %77 to i32 %102 = bitcast float %78 to i32 %103 = insertelement <2 x i32> undef, i32 %101, i32 0 %104 = insertelement <2 x i32> %103, i32 %102, i32 1 %105 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %104, <32 x i8> %47, <16 x i8> %49, i32 2) %106 = extractelement <4 x float> %105, i32 0 %107 = extractelement <4 x float> %105, i32 1 %108 = extractelement <4 x float> %105, i32 2 %109 = fadd float %106, -5.000000e-01 %110 = fadd float %107, -5.000000e-01 %111 = fadd float %108, -5.000000e-01 %112 = fmul float %109, %109 %113 = fmul float %110, %110 %114 = fadd float %113, %112 %115 = fmul float %111, %111 %116 = fadd float %114, %115 %117 = call float @llvm.AMDGPU.rsq.clamped.f32(float %116) %118 = fmul float %109, %117 %119 = fmul float %110, %117 %120 = fmul float %111, %117 %121 = fmul float %74, %118 %122 = fsub float -0.000000e+00, %121 %123 = fmul float %75, %119 %124 = fsub float %122, %123 %125 = fmul float %76, %120 %126 = fsub float %124, %125 %127 = fmul float %126, %118 %128 = fmul float %126, %119 %129 = fmul float %126, %120 %130 = fmul float %127, 2.000000e+00 %131 = fmul float %128, 2.000000e+00 %132 = fmul float %129, 2.000000e+00 %133 = fsub float -0.000000e+00, %130 %134 = fsub float %133, %74 %135 = fsub float -0.000000e+00, %131 %136 = fsub float %135, %75 %137 = fsub float -0.000000e+00, %132 %138 = fsub float %137, %76 %139 = fmul float %134, %82 %140 = fmul float %134, %83 %141 = fmul float %134, %84 %142 = fmul float %136, %85 %143 = fadd float %142, %139 %144 = fmul float %136, %86 %145 = fadd float %144, %140 %146 = fmul float %136, %87 %147 = fadd float %146, %141 %148 = fmul float %138, %79 %149 = fadd float %148, %143 %150 = fmul float %138, %80 %151 = fadd float %150, %145 %152 = fmul float %138, %81 %153 = fadd float %152, %147 %154 = bitcast float %77 to i32 %155 = bitcast float %78 to i32 %156 = insertelement <2 x i32> undef, i32 %154, i32 0 %157 = insertelement <2 x i32> %156, i32 %155, i32 1 %158 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %157, <32 x i8> %64, <16 x i8> %67, i32 2) %159 = extractelement <4 x float> %158, i32 0 %160 = extractelement <4 x float> %158, i32 1 %161 = extractelement <4 x float> %158, i32 2 %162 = fmul float %34, %149 %163 = fmul float %35, %149 %164 = fmul float %36, %149 %165 = fmul float %37, %151 %166 = fadd float %165, %162 %167 = fmul float %38, %151 %168 = fadd float %167, %163 %169 = fmul float %39, %151 %170 = fadd float %169, %164 %171 = fmul float %40, %153 %172 = fadd float %171, %166 %173 = fmul float %41, %153 %174 = fadd float %173, %168 %175 = fmul float %42, %153 %176 = fadd float %175, %170 %177 = insertelement <4 x float> undef, float %172, i32 0 %178 = insertelement <4 x float> %177, float %174, i32 1 %179 = insertelement <4 x float> %178, float %176, i32 2 %180 = insertelement <4 x float> %179, float 0.000000e+00, i32 3 %181 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %180) %182 = extractelement <4 x float> %181, i32 0 %183 = extractelement <4 x float> %181, i32 1 %184 = extractelement <4 x float> %181, i32 2 %185 = extractelement <4 x float> %181, i32 3 %186 = call float @fabs(float %184) %187 = fdiv float 1.000000e+00, %186 %188 = fmul float %182, %187 %189 = fadd float %188, 1.500000e+00 %190 = fmul float %183, %187 %191 = fadd float %190, 1.500000e+00 %192 = bitcast float %191 to i32 %193 = bitcast float %189 to i32 %194 = bitcast float %185 to i32 %195 = insertelement <4 x i32> undef, i32 %192, i32 0 %196 = insertelement <4 x i32> %195, i32 %193, i32 1 %197 = insertelement <4 x i32> %196, i32 %194, i32 2 %198 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %197, <32 x i8> %70, <16 x i8> %73, i32 4) %199 = extractelement <4 x float> %198, i32 0 %200 = extractelement <4 x float> %198, i32 1 %201 = extractelement <4 x float> %198, i32 2 %202 = fmul float %159, %199 %203 = fadd float %202, %96 %204 = fmul float %160, %200 %205 = fadd float %204, %97 %206 = fmul float %161, %201 %207 = fadd float %206, %98 %208 = fmul float %88, %88 %209 = fmul float %89, %89 %210 = fadd float %209, %208 %211 = fmul float %90, %90 %212 = fadd float %210, %211 %213 = call float @llvm.AMDGPU.rsq.clamped.f32(float %212) %214 = fmul float %88, %213 %215 = fmul float %89, %213 %216 = fmul float %90, %213 %217 = fmul float %118, %214 %218 = fmul float %119, %215 %219 = fadd float %218, %217 %220 = fmul float %120, %216 %221 = fadd float %219, %220 %222 = call float @llvm.maxnum.f32(float %221, float 0.000000e+00) %223 = fmul float %27, %222 %224 = fmul float %28, %222 %225 = fmul float %29, %222 %226 = fmul float %223, %43 %227 = fadd float %226, %24 %228 = fmul float %224, %44 %229 = fadd float %228, %25 %230 = fmul float %225, %45 %231 = fadd float %230, %26 %232 = fmul float %203, %227 %233 = fmul float %205, %229 %234 = fmul float %207, %231 %235 = bitcast float %77 to i32 %236 = bitcast float %78 to i32 %237 = insertelement <2 x i32> undef, i32 %235, i32 0 %238 = insertelement <2 x i32> %237, i32 %236, i32 1 %239 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %238, <32 x i8> %58, <16 x i8> %61, i32 2) %240 = extractelement <4 x float> %239, i32 0 %241 = extractelement <4 x float> %239, i32 1 %242 = extractelement <4 x float> %239, i32 2 %243 = fmul float %240, %30 %244 = fadd float %243, %232 %245 = fmul float %241, %31 %246 = fadd float %245, %233 %247 = fmul float %242, %32 %248 = fadd float %247, %234 %249 = call i32 @llvm.SI.packf16(float %244, float %246) %250 = bitcast i32 %249 to float %251 = call i32 @llvm.SI.packf16(float %248, float %100) %252 = bitcast i32 %251 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %250, float %252, float %250, float %252) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_load_dwordx4 s[44:47], s[4:5], 0x0 ; C0960500 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s68, s[0:3], 0x1c ; C222011C s_buffer_load_dword s69, s[0:3], 0x1d ; C222811D s_buffer_load_dword s70, s[0:3], 0x1e ; C223011E s_buffer_load_dword s71, s[0:3], 0x20 ; C2238120 s_buffer_load_dword s72, s[0:3], 0x21 ; C2240121 s_buffer_load_dword s73, s[0:3], 0x22 ; C2248122 s_buffer_load_dword s74, s[0:3], 0x24 ; C2250124 s_buffer_load_dword s75, s[0:3], 0x25 ; C2258125 s_buffer_load_dword s76, s[0:3], 0x26 ; C2260126 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 0, 3, [m0] ; C8280C00 v_interp_p2_f32 v10, [v10], v1, 0, 3, [m0] ; C8290C01 v_interp_p1_f32 v11, v0, 1, 3, [m0] ; C82C0D00 v_interp_p2_f32 v11, [v11], v1, 1, 3, [m0] ; C82D0D01 v_interp_p1_f32 v12, v0, 2, 3, [m0] ; C8300E00 v_interp_p2_f32 v12, [v12], v1, 2, 3, [m0] ; C8310E01 v_interp_p1_f32 v13, v0, 0, 4, [m0] ; C8341000 v_interp_p2_f32 v13, [v13], v1, 0, 4, [m0] ; C8351001 v_interp_p1_f32 v14, v0, 1, 4, [m0] ; C8381100 v_interp_p2_f32 v14, [v14], v1, 1, 4, [m0] ; C8391101 v_interp_p1_f32 v15, v0, 2, 4, [m0] ; C83C1200 v_interp_p2_f32 v15, [v15], v1, 2, 4, [m0] ; C83D1201 v_interp_p1_f32 v16, v0, 0, 5, [m0] ; C8401400 v_interp_p2_f32 v16, [v16], v1, 0, 5, [m0] ; C8411401 v_interp_p1_f32 v17, v0, 1, 5, [m0] ; C8441500 v_interp_p2_f32 v17, [v17], v1, 1, 5, [m0] ; C8451501 v_interp_p1_f32 v18, v0, 2, 5, [m0] ; C8481600 v_interp_p2_f32 v18, [v18], v1, 2, 5, [m0] ; C8491601 s_load_dwordx4 s[56:59], s[4:5], 0x4 ; C09C0504 s_load_dwordx4 s[8:11], s[4:5], 0x8 ; C0840508 s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C s_load_dwordx4 s[20:23], s[4:5], 0x10 ; C08A0510 s_load_dwordx8 s[48:55], s[6:7], 0x0 ; C0D80700 s_load_dwordx8 s[60:67], s[6:7], 0x8 ; C0DE0708 s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710 s_load_dwordx8 s[36:43], s[6:7], 0x18 ; C0D20718 s_load_dwordx8 s[28:35], s[6:7], 0x20 ; C0CE0720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[60:67], s[56:59] ; F0800F00 01CF1305 image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[48:55], s[44:47] ; F0800700 016C1705 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, -0.5, v23 ; 06002EF1 v_add_f32_e32 v1, -0.5, v24 ; 060230F1 v_add_f32_e32 v23, -0.5, v25 ; 062E32F1 v_mul_f32_e32 v24, v0, v0 ; 10300100 v_mad_f32 v24, v1, v1, v24 ; D2820018 04620301 v_mad_f32 v24, v23, v23, v24 ; D2820018 04622F17 v_rsq_clamp_f32_e32 v24, v24 ; 7E305918 v_mul_f32_e32 v0, v24, v0 ; 10000118 v_mul_f32_e32 v1, v24, v1 ; 10020318 v_mul_f32_e32 v23, v24, v23 ; 102E2F18 v_mul_f32_e32 v24, v0, v2 ; 10300500 v_mad_f32 v24, -v3, v1, -v24 ; D2820018 A4620303 v_mad_f32 v24, -v4, v23, v24 ; D2820018 24622F04 v_mul_f32_e32 v25, v0, v24 ; 10323100 v_mad_f32 v25, v24, v0, v25 ; D2820019 04660118 v_mul_f32_e32 v26, v1, v24 ; 10343101 v_mad_f32 v26, v24, v1, v26 ; D282001A 046A0318 v_mul_f32_e32 v27, v23, v24 ; 10363117 v_mad_f32 v24, v24, v23, v27 ; D2820018 046E2F18 v_sub_f32_e64 v2, -v25, v2 ; D2080002 20020519SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MUL TEMP[1], CONST[5], IN[1].xxxx 2: MAD TEMP[1], CONST[6], IN[1].yyyy, TEMP[1] 3: MAD TEMP[1], CONST[7], IN[1].zzzz, TEMP[1] 4: MAD TEMP[1].xy, CONST[8], IN[1].wwww, TEMP[1] 5: MOV TEMP[0].xy, TEMP[1].xyxx 6: DP3 TEMP[1].x, CONST[4].xyzz, IN[2].xyzz 7: DP3 TEMP[2].x, CONST[4].xyzz, IN[3].xyzz 8: MOV TEMP[1].y, TEMP[2].xxxx 9: DP3 TEMP[2].x, CONST[4].xyzz, IN[4].xyzz 10: MOV TEMP[1].z, TEMP[2].xxxx 11: MUL TEMP[2], CONST[0], IN[0].xxxx 12: MAD TEMP[2], CONST[1], IN[0].yyyy, TEMP[2] 13: MAD TEMP[2], CONST[2], IN[0].zzzz, TEMP[2] 14: MAD TEMP[2], CONST[3], IN[0].wwww, TEMP[2] 15: MOV TEMP[1].xyz, TEMP[1].xyzx 16: MOV OUT[2], TEMP[1] 17: MOV OUT[0], TEMP[2] 18: MOV OUT[1], TEMP[0] 19: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = extractelement <4 x float> %46, i32 3 %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = add i32 %5, %7 %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %53) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = extractelement <4 x float> %54, i32 3 %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = add i32 %5, %7 %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %61) %63 = extractelement <4 x float> %62, i32 0 %64 = extractelement <4 x float> %62, i32 1 %65 = extractelement <4 x float> %62, i32 2 %66 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = add i32 %5, %7 %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %68) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = extractelement <4 x float> %69, i32 2 %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = add i32 %5, %7 %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %75) %77 = extractelement <4 x float> %76, i32 0 %78 = extractelement <4 x float> %76, i32 1 %79 = extractelement <4 x float> %76, i32 2 %80 = fmul float %32, %55 %81 = fmul float %33, %55 %82 = fmul float %34, %55 %83 = fmul float %35, %56 %84 = fadd float %83, %80 %85 = fmul float %36, %56 %86 = fadd float %85, %81 %87 = fmul float %37, %56 %88 = fadd float %87, %82 %89 = fmul float %38, %57 %90 = fadd float %89, %84 %91 = fmul float %39, %57 %92 = fadd float %91, %86 %93 = fmul float %40, %57 %94 = fadd float %93, %88 %95 = fmul float %41, %58 %96 = fadd float %95, %90 %97 = fmul float %42, %58 %98 = fadd float %97, %92 %99 = fmul float %29, %63 %100 = fmul float %30, %64 %101 = fadd float %100, %99 %102 = fmul float %31, %65 %103 = fadd float %101, %102 %104 = fmul float %29, %70 %105 = fmul float %30, %71 %106 = fadd float %105, %104 %107 = fmul float %31, %72 %108 = fadd float %106, %107 %109 = fmul float %29, %77 %110 = fmul float %30, %78 %111 = fadd float %110, %109 %112 = fmul float %31, %79 %113 = fadd float %111, %112 %114 = fmul float %13, %47 %115 = fmul float %14, %47 %116 = fmul float %15, %47 %117 = fmul float %16, %47 %118 = fmul float %17, %48 %119 = fadd float %118, %114 %120 = fmul float %18, %48 %121 = fadd float %120, %115 %122 = fmul float %19, %48 %123 = fadd float %122, %116 %124 = fmul float %20, %48 %125 = fadd float %124, %117 %126 = fmul float %21, %49 %127 = fadd float %126, %119 %128 = fmul float %22, %49 %129 = fadd float %128, %121 %130 = fmul float %23, %49 %131 = fadd float %130, %123 %132 = fmul float %24, %49 %133 = fadd float %132, %125 %134 = fmul float %25, %50 %135 = fadd float %134, %127 %136 = fmul float %26, %50 %137 = fadd float %136, %129 %138 = fmul float %27, %50 %139 = fadd float %138, %131 %140 = fmul float %28, %50 %141 = fadd float %140, %133 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %96, float %98, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %103, float %108, float %113, float %94) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %135, float %137, float %139, float %141) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s24, s[0:3], 0xf ; C20C010F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 buffer_load_format_xyzw v[14:17], v0, s[20:23], 0 idxen ; E00C2000 80050E00 buffer_load_format_xyzw v[18:21], v0, s[8:11], 0 idxen ; E00C2000 80021200 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_buffer_load_dword s6, s[0:3], 0x12 ; C2030112 s_buffer_load_dword s7, s[0:3], 0x14 ; C2038114 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x17 ; C2048117 s_buffer_load_dword s10, s[0:3], 0x18 ; C2050118 s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119 s_buffer_load_dword s12, s[0:3], 0x1b ; C206011B s_buffer_load_dword s13, s[0:3], 0x0 ; C2068100 s_buffer_load_dword s14, s[0:3], 0x1 ; C2070101 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s16, s[0:3], 0x3 ; C2080103 s_buffer_load_dword s17, s[0:3], 0x4 ; C2088104 s_buffer_load_dword s18, s[0:3], 0x1c ; C209011C s_buffer_load_dword s19, s[0:3], 0x1d ; C209811D s_buffer_load_dword s20, s[0:3], 0x1f ; C20A011F s_buffer_load_dword s21, s[0:3], 0x20 ; C20A8120 s_buffer_load_dword s22, s[0:3], 0x21 ; C20B0121 s_buffer_load_dword s23, s[0:3], 0x5 ; C20B8105 s_buffer_load_dword s25, s[0:3], 0x6 ; C20C8106 s_buffer_load_dword s26, s[0:3], 0x7 ; C20D0107 s_buffer_load_dword s27, s[0:3], 0x8 ; C20D8108 s_buffer_load_dword s28, s[0:3], 0x9 ; C20E0109 s_buffer_load_dword s29, s[0:3], 0xa ; C20E810A s_buffer_load_dword s30, s[0:3], 0xb ; C20F010B s_buffer_load_dword s31, s[0:3], 0xc ; C20F810C s_buffer_load_dword s32, s[0:3], 0xd ; C210010D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s13, v2 ; 1000040D v_mul_f32_e32 v22, s7, v6 ; 102C0C07 v_mul_f32_e32 v23, s4, v10 ; 102E1404 v_mad_f32 v23, s5, v11, v23 ; D2820017 045E1605 v_mad_f32 v10, s6, v12, v23 ; D282000A 045E1806 v_mul_f32_e32 v11, s4, v14 ; 10161C04 v_mad_f32 v11, s5, v15, v11 ; D282000B 042E1E05 v_mad_f32 v11, s6, v16, v11 ; D282000B 042E2006 v_mul_f32_e32 v12, s4, v18 ; 10182404 v_mad_f32 v12, s5, v19, v12 ; D282000C 04322605 v_mad_f32 v12, s6, v20, v12 ; D282000C 04322806 v_mad_f32 v13, s10, v7, v22 ; D282000D 045A0E0A v_mul_f32_e32 v14, s8, v6 ; 101C0C08 v_mad_f32 v14, s11, v7, v14 ; D282000E 043A0E0B v_mul_f32_e32 v15, s9, v6 ; 101E0C09 v_mad_f32 v15, s12, v7, v15 ; D282000F 043E0E0C v_mad_f32 v13, s18, v8, v13 ; D282000D 04361012 v_mad_f32 v14, s19, v8, v14 ; D282000E 043A1013 v_mad_f32 v15, s20, v8, v15 ; D282000F 043E1014 v_mad_f32 v13, s21, v9, v13 ; D282000D 04361215 v_mad_f32 v6, s22, v9, v14 ; D2820006 043A1216 v_mad_f32 v0, s17, v3, v0 ; D2820000 04020611 v_mul_f32_e32 v7, s14, v2 ; 100E040E v_mad_f32 v7, s23, v3, v7 ; D2820007 041E0617 v_mul_f32_e32 v8, s15, v2 ; 1010040F v_mad_f32 v8, s25, v3, v8 ; D2820008 04220619 v_mul_f32_e32 v9, s16, v2 ; 10120410 v_mad_f32 v9, s26, v3, v9 ; D2820009 0426061A v_mad_f32 v0, s27, v4, v0 ; D2820000 0402081B v_mad_f32 v7, s28, v4, v7 ; D2820007 041E081C v_mad_f32 v8, s29, v4, v8 ; D2820008 0422081D v_mad_f32 v9, s30, v4, v9 ; D2820009 0426081E v_mad_f32 v0, s31, v5, v0 ; D2820000 04020A1F v_mad_f32 v7, s32, v5, v7 ; D2820007 041E0A20 v_mad_f32 v8, s0, v5, v8 ; D2820008 04220A00 v_mad_f32 v2, s24, v5, v9 ; D2820002 04260A18 exp 15, 32, 0, 0, 0, v13, v6, v1, v1 ; F800020F 0101060D exp 15, 33, 0, 0, 0, v10, v11, v12, v15 ; F800021F 0F0C0B0A exp 15, 12, 0, 1, 0, v0, v7, v8, v2 ; F80008CF 02080700 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 24 Code Size: 476 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL CONST[3..7] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { -0.5000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D 2: MUL TEMP[1].x, TEMP[0].wwww, CONST[6].xxxx 3: MOV TEMP[1].w, TEMP[1].xxxx 4: MOV TEMP[2].xy, IN[0].xyyy 5: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D 6: ADD TEMP[2].xyz, TEMP[2].xyzz, IMM[0].xxxx 7: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 8: RSQ TEMP[3].x, TEMP[3].xxxx 9: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 10: DP3 TEMP[3].x, IN[1].xyzz, IN[1].xyzz 11: RSQ TEMP[3].x, TEMP[3].xxxx 12: MUL TEMP[3].xyz, IN[1].xyzz, TEMP[3].xxxx 13: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[3].xyzz 14: MAX TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy 15: MUL TEMP[2].xyz, CONST[4].xyzz, TEMP[2].xxxx 16: MAD TEMP[2].xyz, TEMP[2].xyzz, CONST[7].xyzz, CONST[3].xyzz 17: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[2].xyzz 18: MOV TEMP[0].xy, IN[0].xyyy 19: TEX TEMP[0].xyz, TEMP[0], SAMP[2], 2D 20: MAD TEMP[1].xyz, TEMP[0].xyzz, CONST[5].xyzz, TEMP[1].xyzz 21: MOV OUT[0], TEMP[1] 22: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %37 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %38 = load <32 x i8>, <32 x i8> addrspace(2)* %37, align 32, !tbaa !0 %39 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %42 = bitcast <8 x i32> addrspace(2)* %41 to <32 x i8> addrspace(2)* %43 = load <32 x i8>, <32 x i8> addrspace(2)* %42, align 32, !tbaa !0 %44 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %45 = bitcast <4 x i32> addrspace(2)* %44 to <16 x i8> addrspace(2)* %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %48 = bitcast <8 x i32> addrspace(2)* %47 to <32 x i8> addrspace(2)* %49 = load <32 x i8>, <32 x i8> addrspace(2)* %48, align 32, !tbaa !0 %50 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %51 = bitcast <4 x i32> addrspace(2)* %50 to <16 x i8> addrspace(2)* %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %58 = bitcast float %53 to i32 %59 = bitcast float %54 to i32 %60 = insertelement <2 x i32> undef, i32 %58, i32 0 %61 = insertelement <2 x i32> %60, i32 %59, i32 1 %62 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %61, <32 x i8> %43, <16 x i8> %46, i32 2) %63 = extractelement <4 x float> %62, i32 0 %64 = extractelement <4 x float> %62, i32 1 %65 = extractelement <4 x float> %62, i32 2 %66 = extractelement <4 x float> %62, i32 3 %67 = fmul float %66, %33 %68 = bitcast float %53 to i32 %69 = bitcast float %54 to i32 %70 = insertelement <2 x i32> undef, i32 %68, i32 0 %71 = insertelement <2 x i32> %70, i32 %69, i32 1 %72 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %71, <32 x i8> %38, <16 x i8> %40, i32 2) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = extractelement <4 x float> %72, i32 2 %76 = fadd float %73, -5.000000e-01 %77 = fadd float %74, -5.000000e-01 %78 = fadd float %75, -5.000000e-01 %79 = fmul float %76, %76 %80 = fmul float %77, %77 %81 = fadd float %80, %79 %82 = fmul float %78, %78 %83 = fadd float %81, %82 %84 = call float @llvm.AMDGPU.rsq.clamped.f32(float %83) %85 = fmul float %76, %84 %86 = fmul float %77, %84 %87 = fmul float %78, %84 %88 = fmul float %55, %55 %89 = fmul float %56, %56 %90 = fadd float %89, %88 %91 = fmul float %57, %57 %92 = fadd float %90, %91 %93 = call float @llvm.AMDGPU.rsq.clamped.f32(float %92) %94 = fmul float %55, %93 %95 = fmul float %56, %93 %96 = fmul float %57, %93 %97 = fmul float %85, %94 %98 = fmul float %86, %95 %99 = fadd float %98, %97 %100 = fmul float %87, %96 %101 = fadd float %99, %100 %102 = call float @llvm.maxnum.f32(float %101, float 0.000000e+00) %103 = fmul float %27, %102 %104 = fmul float %28, %102 %105 = fmul float %29, %102 %106 = fmul float %103, %34 %107 = fadd float %106, %24 %108 = fmul float %104, %35 %109 = fadd float %108, %25 %110 = fmul float %105, %36 %111 = fadd float %110, %26 %112 = fmul float %63, %107 %113 = fmul float %64, %109 %114 = fmul float %65, %111 %115 = bitcast float %53 to i32 %116 = bitcast float %54 to i32 %117 = insertelement <2 x i32> undef, i32 %115, i32 0 %118 = insertelement <2 x i32> %117, i32 %116, i32 1 %119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %118, <32 x i8> %49, <16 x i8> %52, i32 2) %120 = extractelement <4 x float> %119, i32 0 %121 = extractelement <4 x float> %119, i32 1 %122 = extractelement <4 x float> %119, i32 2 %123 = fmul float %120, %30 %124 = fadd float %123, %112 %125 = fmul float %121, %31 %126 = fadd float %125, %113 %127 = fmul float %122, %32 %128 = fadd float %127, %114 %129 = call i32 @llvm.SI.packf16(float %124, float %126) %130 = bitcast i32 %129 to float %131 = call i32 @llvm.SI.packf16(float %128, float %67) %132 = bitcast i32 %131 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %130, float %132, float %130, float %132) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[16:19] ; F0800F00 00870702 image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[8:11] ; F0800700 00450B02 image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[12:15] ; F0800700 00690002 s_waitcnt vmcnt(1) ; BF8C0771 v_add_f32_e32 v3, -0.5, v11 ; 060616F1 v_add_f32_e32 v14, -0.5, v12 ; 061C18F1 v_add_f32_e32 v11, -0.5, v13 ; 06161AF1 v_mul_f32_e32 v12, v3, v3 ; 10180703 v_mad_f32 v12, v14, v14, v12 ; D282000C 04321D0E v_mad_f32 v12, v11, v11, v12 ; D282000C 0432170B v_rsq_clamp_f32_e32 v12, v12 ; 7E18590C v_mul_f32_e32 v13, v4, v4 ; 101A0904 v_mad_f32 v13, v5, v5, v13 ; D282000D 04360B05 v_mad_f32 v13, v6, v6, v13 ; D282000D 04360D06 v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_buffer_load_dword s6, s[0:3], 0xe ; C203010E s_buffer_load_dword s7, s[0:3], 0x10 ; C2038110 s_buffer_load_dword s8, s[0:3], 0x11 ; C2040111 s_buffer_load_dword s9, s[0:3], 0x12 ; C2048112 s_buffer_load_dword s10, s[0:3], 0x14 ; C2050114SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..7] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MUL TEMP[1], CONST[4], IN[1].xxxx 2: MAD TEMP[1], CONST[5], IN[1].yyyy, TEMP[1] 3: MAD TEMP[1], CONST[6], IN[1].zzzz, TEMP[1] 4: MAD TEMP[1].xy, CONST[7], IN[1].wwww, TEMP[1] 5: MOV TEMP[0].xy, TEMP[1].xyxx 6: MUL TEMP[1], CONST[0], IN[0].xxxx 7: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[1] 8: MAD TEMP[1], CONST[2], IN[0].zzzz, TEMP[1] 9: MAD TEMP[1], CONST[3], IN[0].wwww, TEMP[1] 10: MOV OUT[0], TEMP[1] 11: MOV OUT[1], TEMP[0] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %29, %49 %54 = fmul float %30, %49 %55 = fmul float %31, %50 %56 = fadd float %55, %53 %57 = fmul float %32, %50 %58 = fadd float %57, %54 %59 = fmul float %33, %51 %60 = fadd float %59, %56 %61 = fmul float %34, %51 %62 = fadd float %61, %58 %63 = fmul float %35, %52 %64 = fadd float %63, %60 %65 = fmul float %36, %52 %66 = fadd float %65, %62 %67 = fmul float %13, %41 %68 = fmul float %14, %41 %69 = fmul float %15, %41 %70 = fmul float %16, %41 %71 = fmul float %17, %42 %72 = fadd float %71, %67 %73 = fmul float %18, %42 %74 = fadd float %73, %68 %75 = fmul float %19, %42 %76 = fadd float %75, %69 %77 = fmul float %20, %42 %78 = fadd float %77, %70 %79 = fmul float %21, %43 %80 = fadd float %79, %72 %81 = fmul float %22, %43 %82 = fadd float %81, %74 %83 = fmul float %23, %43 %84 = fadd float %83, %76 %85 = fmul float %24, %43 %86 = fadd float %85, %78 %87 = fmul float %25, %44 %88 = fadd float %87, %80 %89 = fmul float %26, %44 %90 = fadd float %89, %82 %91 = fmul float %27, %44 %92 = fadd float %91, %84 %93 = fmul float %28, %44 %94 = fadd float %93, %86 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %64, float %66, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %88, float %90, float %92, float %94) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0xf ; C206010F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_buffer_load_dword s6, s[0:3], 0x14 ; C2030114 s_buffer_load_dword s7, s[0:3], 0x15 ; C2038115 s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x3 ; C2058103 s_buffer_load_dword s13, s[0:3], 0x4 ; C2068104 s_buffer_load_dword s14, s[0:3], 0x5 ; C2070105 s_buffer_load_dword s15, s[0:3], 0x6 ; C2078106 s_buffer_load_dword s16, s[0:3], 0x7 ; C2080107 s_buffer_load_dword s17, s[0:3], 0x8 ; C2088108 s_buffer_load_dword s18, s[0:3], 0x9 ; C2090109 s_buffer_load_dword s19, s[0:3], 0x18 ; C2098118 s_buffer_load_dword s20, s[0:3], 0x19 ; C20A0119 s_buffer_load_dword s21, s[0:3], 0x1c ; C20A811C s_buffer_load_dword s22, s[0:3], 0x1d ; C20B011D s_buffer_load_dword s23, s[0:3], 0xa ; C20B810A s_buffer_load_dword s24, s[0:3], 0xb ; C20C010B s_buffer_load_dword s25, s[0:3], 0xc ; C20C810C s_buffer_load_dword s26, s[0:3], 0xd ; C20D010D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s8, v1 ; 10000208 v_mul_f32_e32 v9, s4, v5 ; 10120A04 v_mad_f32 v9, s6, v6, v9 ; D2820009 04260C06 v_mul_f32_e32 v10, s5, v5 ; 10140A05 v_mad_f32 v10, s7, v6, v10 ; D282000A 042A0C07 v_mad_f32 v9, s19, v7, v9 ; D2820009 04260E13 v_mad_f32 v10, s20, v7, v10 ; D282000A 042A0E14 v_mad_f32 v9, s21, v8, v9 ; D2820009 04261015 v_mad_f32 v5, s22, v8, v10 ; D2820005 042A1016 v_mad_f32 v0, s13, v2, v0 ; D2820000 0402040D v_mul_f32_e32 v6, s9, v1 ; 100C0209 v_mad_f32 v6, s14, v2, v6 ; D2820006 041A040E v_mul_f32_e32 v7, s10, v1 ; 100E020A v_mad_f32 v7, s15, v2, v7 ; D2820007 041E040F v_mul_f32_e32 v8, s11, v1 ; 1010020B v_mad_f32 v8, s16, v2, v8 ; D2820008 04220410 v_mad_f32 v0, s17, v3, v0 ; D2820000 04020611 v_mad_f32 v6, s18, v3, v6 ; D2820006 041A0612 v_mad_f32 v7, s23, v3, v7 ; D2820007 041E0617 v_mad_f32 v8, s24, v3, v8 ; D2820008 04220618 v_mad_f32 v0, s25, v4, v0 ; D2820000 04020819 v_mad_f32 v6, s26, v4, v6 ; D2820006 041A081A v_mad_f32 v7, s0, v4, v7 ; D2820007 041E0800 v_mad_f32 v1, s12, v4, v8 ; D2820001 0422080C v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 32, 0, 0, 0, v9, v5, v2, v2 ; F800020F 02020509 exp 15, 12, 0, 1, 0, v0, v6, v7, v1 ; F80008CF 01070600 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 12 Code Size: 328 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[2..4] DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[1].x, TEMP[0].wwww, CONST[4].xxxx 3: MOV TEMP[1].w, TEMP[1].xxxx 4: MUL TEMP[1].xyz, TEMP[0].xyzz, CONST[2].xyzz 5: MOV TEMP[0].xy, IN[0].xyyy 6: TEX TEMP[0].xyz, TEMP[0], SAMP[1], 2D 7: MAD TEMP[1].xyz, TEMP[0].xyzz, CONST[3].xyzz, TEMP[1].xyzz 8: MOV OUT[0], TEMP[1] 9: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %31 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0 %33 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %36 = bitcast <8 x i32> addrspace(2)* %35 to <32 x i8> addrspace(2)* %37 = load <32 x i8>, <32 x i8> addrspace(2)* %36, align 32, !tbaa !0 %38 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %39 = bitcast <4 x i32> addrspace(2)* %38 to <16 x i8> addrspace(2)* %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %43 = bitcast float %41 to i32 %44 = bitcast float %42 to i32 %45 = insertelement <2 x i32> undef, i32 %43, i32 0 %46 = insertelement <2 x i32> %45, i32 %44, i32 1 %47 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %46, <32 x i8> %32, <16 x i8> %34, i32 2) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = extractelement <4 x float> %47, i32 2 %51 = extractelement <4 x float> %47, i32 3 %52 = fmul float %51, %30 %53 = fmul float %48, %24 %54 = fmul float %49, %25 %55 = fmul float %50, %26 %56 = bitcast float %41 to i32 %57 = bitcast float %42 to i32 %58 = insertelement <2 x i32> undef, i32 %56, i32 0 %59 = insertelement <2 x i32> %58, i32 %57, i32 1 %60 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %59, <32 x i8> %37, <16 x i8> %40, i32 2) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = fmul float %61, %27 %65 = fadd float %64, %53 %66 = fmul float %62, %28 %67 = fadd float %66, %54 %68 = fmul float %63, %29 %69 = fadd float %68, %55 %70 = call i32 @llvm.SI.packf16(float %65, float %67) %71 = bitcast i32 %70 to float %72 = call i32 @llvm.SI.packf16(float %69, float %52) %73 = bitcast i32 %72 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %71, float %73, float %71, float %73) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109 s_buffer_load_dword s6, s[0:3], 0xa ; C203010A s_buffer_load_dword s7, s[0:3], 0xc ; C203810C s_buffer_load_dword s32, s[0:3], 0xd ; C210010D s_buffer_load_dword s33, s[0:3], 0xe ; C210810E s_buffer_load_dword s0, s[0:3], 0x10 ; C2000110 image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800F00 00440402 image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800700 00660002 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v3, s4, v4 ; 10060804 v_mul_f32_e32 v8, s5, v5 ; 10100A05 v_mul_f32_e32 v9, s6, v6 ; 10120C06 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v0, s7, v3 ; D2820003 040C0F00 v_mad_f32 v8, v1, s32, v8 ; D2820008 04204101 v_mad_f32 v0, v2, s33, v9 ; D2820000 04244302 v_mul_f32_e32 v1, s0, v7 ; 10020E00 v_cvt_pkrtz_f16_f32_e32 v2, v3, v8 ; 5E041103 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 12 Code Size: 160 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MUL TEMP[1], CONST[5], IN[1].xxxx 2: MAD TEMP[1], CONST[6], IN[1].yyyy, TEMP[1] 3: MAD TEMP[1], CONST[7], IN[1].zzzz, TEMP[1] 4: MAD TEMP[1].xy, CONST[8], IN[1].wwww, TEMP[1] 5: MOV TEMP[0].xy, TEMP[1].xyxx 6: DP3 TEMP[1].x, CONST[4].xyzz, IN[2].xyzz 7: DP3 TEMP[2].x, CONST[4].xyzz, IN[3].xyzz 8: MOV TEMP[1].y, TEMP[2].xxxx 9: DP3 TEMP[2].x, CONST[4].xyzz, IN[4].xyzz 10: MOV TEMP[1].z, TEMP[2].xxxx 11: MUL TEMP[2], CONST[0], IN[0].xxxx 12: MAD TEMP[2], CONST[1], IN[0].yyyy, TEMP[2] 13: MAD TEMP[2], CONST[2], IN[0].zzzz, TEMP[2] 14: MAD TEMP[2], CONST[3], IN[0].wwww, TEMP[2] 15: MOV TEMP[1].xyz, TEMP[1].xyzx 16: MOV OUT[2], TEMP[1] 17: MOV OUT[0], TEMP[2] 18: MOV OUT[1], TEMP[0] 19: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = extractelement <4 x float> %46, i32 3 %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = add i32 %5, %7 %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %53) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = extractelement <4 x float> %54, i32 3 %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = add i32 %5, %7 %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %61) %63 = extractelement <4 x float> %62, i32 0 %64 = extractelement <4 x float> %62, i32 1 %65 = extractelement <4 x float> %62, i32 2 %66 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = add i32 %5, %7 %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %68) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = extractelement <4 x float> %69, i32 2 %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = add i32 %5, %7 %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %75) %77 = extractelement <4 x float> %76, i32 0 %78 = extractelement <4 x float> %76, i32 1 %79 = extractelement <4 x float> %76, i32 2 %80 = fmul float %32, %55 %81 = fmul float %33, %55 %82 = fmul float %34, %55 %83 = fmul float %35, %56 %84 = fadd float %83, %80 %85 = fmul float %36, %56 %86 = fadd float %85, %81 %87 = fmul float %37, %56 %88 = fadd float %87, %82 %89 = fmul float %38, %57 %90 = fadd float %89, %84 %91 = fmul float %39, %57 %92 = fadd float %91, %86 %93 = fmul float %40, %57 %94 = fadd float %93, %88 %95 = fmul float %41, %58 %96 = fadd float %95, %90 %97 = fmul float %42, %58 %98 = fadd float %97, %92 %99 = fmul float %29, %63 %100 = fmul float %30, %64 %101 = fadd float %100, %99 %102 = fmul float %31, %65 %103 = fadd float %101, %102 %104 = fmul float %29, %70 %105 = fmul float %30, %71 %106 = fadd float %105, %104 %107 = fmul float %31, %72 %108 = fadd float %106, %107 %109 = fmul float %29, %77 %110 = fmul float %30, %78 %111 = fadd float %110, %109 %112 = fmul float %31, %79 %113 = fadd float %111, %112 %114 = fmul float %13, %47 %115 = fmul float %14, %47 %116 = fmul float %15, %47 %117 = fmul float %16, %47 %118 = fmul float %17, %48 %119 = fadd float %118, %114 %120 = fmul float %18, %48 %121 = fadd float %120, %115 %122 = fmul float %19, %48 %123 = fadd float %122, %116 %124 = fmul float %20, %48 %125 = fadd float %124, %117 %126 = fmul float %21, %49 %127 = fadd float %126, %119 %128 = fmul float %22, %49 %129 = fadd float %128, %121 %130 = fmul float %23, %49 %131 = fadd float %130, %123 %132 = fmul float %24, %49 %133 = fadd float %132, %125 %134 = fmul float %25, %50 %135 = fadd float %134, %127 %136 = fmul float %26, %50 %137 = fadd float %136, %129 %138 = fmul float %27, %50 %139 = fadd float %138, %131 %140 = fmul float %28, %50 %141 = fadd float %140, %133 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %96, float %98, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %103, float %108, float %113, float %94) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %135, float %137, float %139, float %141) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s24, s[0:3], 0xf ; C20C010F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 buffer_load_format_xyzw v[14:17], v0, s[20:23], 0 idxen ; E00C2000 80050E00 buffer_load_format_xyzw v[18:21], v0, s[8:11], 0 idxen ; E00C2000 80021200 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_buffer_load_dword s6, s[0:3], 0x12 ; C2030112 s_buffer_load_dword s7, s[0:3], 0x14 ; C2038114 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x17 ; C2048117 s_buffer_load_dword s10, s[0:3], 0x18 ; C2050118 s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119 s_buffer_load_dword s12, s[0:3], 0x1b ; C206011B s_buffer_load_dword s13, s[0:3], 0x0 ; C2068100 s_buffer_load_dword s14, s[0:3], 0x1 ; C2070101 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s16, s[0:3], 0x3 ; C2080103 s_buffer_load_dword s17, s[0:3], 0x4 ; C2088104 s_buffer_load_dword s18, s[0:3], 0x1c ; C209011C s_buffer_load_dword s19, s[0:3], 0x1d ; C209811D s_buffer_load_dword s20, s[0:3], 0x1f ; C20A011F s_buffer_load_dword s21, s[0:3], 0x20 ; C20A8120 s_buffer_load_dword s22, s[0:3], 0x21 ; C20B0121 s_buffer_load_dword s23, s[0:3], 0x5 ; C20B8105 s_buffer_load_dword s25, s[0:3], 0x6 ; C20C8106 s_buffer_load_dword s26, s[0:3], 0x7 ; C20D0107 s_buffer_load_dword s27, s[0:3], 0x8 ; C20D8108 s_buffer_load_dword s28, s[0:3], 0x9 ; C20E0109 s_buffer_load_dword s29, s[0:3], 0xa ; C20E810A s_buffer_load_dword s30, s[0:3], 0xb ; C20F010B s_buffer_load_dword s31, s[0:3], 0xc ; C20F810C s_buffer_load_dword s32, s[0:3], 0xd ; C210010D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s13, v2 ; 1000040D v_mul_f32_e32 v22, s7, v6 ; 102C0C07 v_mul_f32_e32 v23, s4, v10 ; 102E1404 v_mad_f32 v23, s5, v11, v23 ; D2820017 045E1605 v_mad_f32 v10, s6, v12, v23 ; D282000A 045E1806 v_mul_f32_e32 v11, s4, v14 ; 10161C04 v_mad_f32 v11, s5, v15, v11 ; D282000B 042E1E05 v_mad_f32 v11, s6, v16, v11 ; D282000B 042E2006 v_mul_f32_e32 v12, s4, v18 ; 10182404 v_mad_f32 v12, s5, v19, v12 ; D282000C 04322605 v_mad_f32 v12, s6, v20, v12 ; D282000C 04322806 v_mad_f32 v13, s10, v7, v22 ; D282000D 045A0E0A v_mul_f32_e32 v14, s8, v6 ; 101C0C08 v_mad_f32 v14, s11, v7, v14 ; D282000E 043A0E0B v_mul_f32_e32 v15, s9, v6 ; 101E0C09 v_mad_f32 v15, s12, v7, v15 ; D282000F 043E0E0C v_mad_f32 v13, s18, v8, v13 ; D282000D 04361012 v_mad_f32 v14, s19, v8, v14 ; D282000E 043A1013 v_mad_f32 v15, s20, v8, v15 ; D282000F 043E1014 v_mad_f32 v13, s21, v9, v13 ; D282000D 04361215 v_mad_f32 v6, s22, v9, v14 ; D2820006 043A1216 v_mad_f32 v0, s17, v3, v0 ; D2820000 04020611 v_mul_f32_e32 v7, s14, v2 ; 100E040E v_mad_f32 v7, s23, v3, v7 ; D2820007 041E0617 v_mul_f32_e32 v8, s15, v2 ; 1010040F v_mad_f32 v8, s25, v3, v8 ; D2820008 04220619 v_mul_f32_e32 v9, s16, v2 ; 10120410 v_mad_f32 v9, s26, v3, v9 ; D2820009 0426061A v_mad_f32 v0, s27, v4, v0 ; D2820000 0402081B v_mad_f32 v7, s28, v4, v7 ; D2820007 041E081C v_mad_f32 v8, s29, v4, v8 ; D2820008 0422081D v_mad_f32 v9, s30, v4, v9 ; D2820009 0426081E v_mad_f32 v0, s31, v5, v0 ; D2820000 04020A1F v_mad_f32 v7, s32, v5, v7 ; D2820007 041E0A20 v_mad_f32 v8, s0, v5, v8 ; D2820008 04220A00 v_mad_f32 v2, s24, v5, v9 ; D2820002 04260A18 exp 15, 32, 0, 0, 0, v13, v6, v1, v1 ; F800020F 0101060D exp 15, 33, 0, 0, 0, v10, v11, v12, v15 ; F800021F 0F0C0B0A exp 15, 12, 0, 1, 0, v0, v7, v8, v2 ; F80008CF 02080700 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 24 Code Size: 476 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[2..5] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { -0.5000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D 2: MUL TEMP[1].x, TEMP[0].wwww, CONST[4].xxxx 3: MOV TEMP[1].w, TEMP[1].xxxx 4: MOV TEMP[2].xy, IN[0].xyyy 5: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D 6: ADD TEMP[2].xyz, TEMP[2].xyzz, IMM[0].xxxx 7: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 8: RSQ TEMP[3].x, TEMP[3].xxxx 9: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 10: DP3 TEMP[3].x, IN[1].xyzz, IN[1].xyzz 11: RSQ TEMP[3].x, TEMP[3].xxxx 12: MUL TEMP[3].xyz, IN[1].xyzz, TEMP[3].xxxx 13: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[3].xyzz 14: MAX TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy 15: MUL TEMP[2].xyz, CONST[3].xyzz, TEMP[2].xxxx 16: MAD TEMP[2].xyz, TEMP[2].xyzz, CONST[5].xyzz, CONST[2].xyzz 17: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[2].xyzz 18: MOV OUT[0], TEMP[1] 19: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %35 = load <32 x i8>, <32 x i8> addrspace(2)* %34, align 32, !tbaa !0 %36 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 %38 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %39 = bitcast <8 x i32> addrspace(2)* %38 to <32 x i8> addrspace(2)* %40 = load <32 x i8>, <32 x i8> addrspace(2)* %39, align 32, !tbaa !0 %41 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %42 = bitcast <4 x i32> addrspace(2)* %41 to <16 x i8> addrspace(2)* %43 = load <16 x i8>, <16 x i8> addrspace(2)* %42, align 16, !tbaa !0 %44 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %49 = bitcast float %44 to i32 %50 = bitcast float %45 to i32 %51 = insertelement <2 x i32> undef, i32 %49, i32 0 %52 = insertelement <2 x i32> %51, i32 %50, i32 1 %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %40, <16 x i8> %43, i32 2) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = extractelement <4 x float> %53, i32 3 %58 = fmul float %57, %30 %59 = bitcast float %44 to i32 %60 = bitcast float %45 to i32 %61 = insertelement <2 x i32> undef, i32 %59, i32 0 %62 = insertelement <2 x i32> %61, i32 %60, i32 1 %63 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %62, <32 x i8> %35, <16 x i8> %37, i32 2) %64 = extractelement <4 x float> %63, i32 0 %65 = extractelement <4 x float> %63, i32 1 %66 = extractelement <4 x float> %63, i32 2 %67 = fadd float %64, -5.000000e-01 %68 = fadd float %65, -5.000000e-01 %69 = fadd float %66, -5.000000e-01 %70 = fmul float %67, %67 %71 = fmul float %68, %68 %72 = fadd float %71, %70 %73 = fmul float %69, %69 %74 = fadd float %72, %73 %75 = call float @llvm.AMDGPU.rsq.clamped.f32(float %74) %76 = fmul float %67, %75 %77 = fmul float %68, %75 %78 = fmul float %69, %75 %79 = fmul float %46, %46 %80 = fmul float %47, %47 %81 = fadd float %80, %79 %82 = fmul float %48, %48 %83 = fadd float %81, %82 %84 = call float @llvm.AMDGPU.rsq.clamped.f32(float %83) %85 = fmul float %46, %84 %86 = fmul float %47, %84 %87 = fmul float %48, %84 %88 = fmul float %76, %85 %89 = fmul float %77, %86 %90 = fadd float %89, %88 %91 = fmul float %78, %87 %92 = fadd float %90, %91 %93 = call float @llvm.maxnum.f32(float %92, float 0.000000e+00) %94 = fmul float %27, %93 %95 = fmul float %28, %93 %96 = fmul float %29, %93 %97 = fmul float %94, %31 %98 = fadd float %97, %24 %99 = fmul float %95, %32 %100 = fadd float %99, %25 %101 = fmul float %96, %33 %102 = fadd float %101, %26 %103 = fmul float %54, %98 %104 = fmul float %55, %100 %105 = fmul float %56, %102 %106 = call i32 @llvm.SI.packf16(float %103, float %104) %107 = bitcast i32 %106 to float %108 = call i32 @llvm.SI.packf16(float %105, float %58) %109 = bitcast i32 %108 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %107, float %109, float %107, float %109) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800F00 00660702 image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800700 00440002 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v3, -0.5, v0 ; 060600F1 v_add_f32_e32 v11, -0.5, v1 ; 061602F1 v_add_f32_e32 v0, -0.5, v2 ; 060004F1 v_mul_f32_e32 v1, v3, v3 ; 10020703 v_mad_f32 v1, v11, v11, v1 ; D2820001 0406170B v_mad_f32 v1, v0, v0, v1 ; D2820001 04060100 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mul_f32_e32 v2, v4, v4 ; 10040904 v_mad_f32 v2, v5, v5, v2 ; D2820002 040A0B05 v_mad_f32 v2, v6, v6, v2 ; D2820002 040A0D06 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109 s_buffer_load_dword s6, s[0:3], 0xa ; C203010A s_buffer_load_dword s7, s[0:3], 0xc ; C203810C s_buffer_load_dword s8, s[0:3], 0xd ; C204010D s_buffer_load_dword s9, s[0:3], 0xe ; C204810E s_buffer_load_dword s10, s[0:3], 0x10 ; C2050110 s_buffer_load_dword s11, s[0:3], 0x14 ; C2058114 s_buffer_load_dword s12, s[0:3], 0x15 ; C2060115 s_buffer_load_dword s0, s[0:3], 0x16 ; C2000116 v_mul_f32_e32 v3, v1, v3 ; 10060701 v_mul_f32_e32 v4, v2, v4 ; 10080902 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_mul_f32_e32 v4, v1, v11 ; 10081701 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 v_mad_f32 v3, v4, v5, v3 ; D2820003 040E0B04 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mul_f32_e32 v1, v2, v6 ; 10020D02 v_mad_f32 v0, v0, v1, v3 ; D2820000 040E0300 v_max_f32_e32 v0, 0, v0 ; 20000080 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s7, v0 ; 10020007 v_mul_f32_e32 v2, s8, v0 ; 10040008 v_mul_f32_e32 v0, s9, v0 ; 10000009 v_mul_f32_e32 v3, s10, v10 ; 1006140A v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v1, s11, v1, v4 ; D2820001 0412020B v_mov_b32_e32 v4, s5 ; 7E080205 v_mad_f32 v2, s12, v2, v4 ; D2820002 0412040C v_mov_b32_e32 v4, s6 ; 7E080206 v_mad_f32 v0, s0, v0, v4 ; D2820000 04120000 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_mul_f32_e32 v2, v2, v8 ; 10041102 v_mul_f32_e32 v0, v0, v9 ; 10001300 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v0, v0, v3 ; 5E000700 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 12 Code Size: 328 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MUL TEMP[1], CONST[5], IN[1].xxxx 2: MAD TEMP[1], CONST[6], IN[1].yyyy, TEMP[1] 3: MAD TEMP[1], CONST[7], IN[1].zzzz, TEMP[1] 4: MAD TEMP[1].xy, CONST[8], IN[1].wwww, TEMP[1] 5: MOV TEMP[0].xy, TEMP[1].xyxx 6: DP3 TEMP[1].x, CONST[4].xyzz, IN[2].xyzz 7: DP3 TEMP[2].x, CONST[4].xyzz, IN[3].xyzz 8: MOV TEMP[1].y, TEMP[2].xxxx 9: DP3 TEMP[2].x, CONST[4].xyzz, IN[4].xyzz 10: MOV TEMP[1].z, TEMP[2].xxxx 11: MUL TEMP[2], CONST[0], IN[0].xxxx 12: MAD TEMP[2], CONST[1], IN[0].yyyy, TEMP[2] 13: MAD TEMP[2], CONST[2], IN[0].zzzz, TEMP[2] 14: MAD TEMP[2], CONST[3], IN[0].wwww, TEMP[2] 15: MOV TEMP[1].xyz, TEMP[1].xyzx 16: MOV OUT[2], TEMP[1] 17: MOV OUT[0], TEMP[2] 18: MOV OUT[1], TEMP[0] 19: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = extractelement <4 x float> %46, i32 3 %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = add i32 %5, %7 %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %53) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = extractelement <4 x float> %54, i32 3 %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = add i32 %5, %7 %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %61) %63 = extractelement <4 x float> %62, i32 0 %64 = extractelement <4 x float> %62, i32 1 %65 = extractelement <4 x float> %62, i32 2 %66 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = add i32 %5, %7 %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %68) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = extractelement <4 x float> %69, i32 2 %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = add i32 %5, %7 %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %75) %77 = extractelement <4 x float> %76, i32 0 %78 = extractelement <4 x float> %76, i32 1 %79 = extractelement <4 x float> %76, i32 2 %80 = fmul float %32, %55 %81 = fmul float %33, %55 %82 = fmul float %34, %55 %83 = fmul float %35, %56 %84 = fadd float %83, %80 %85 = fmul float %36, %56 %86 = fadd float %85, %81 %87 = fmul float %37, %56 %88 = fadd float %87, %82 %89 = fmul float %38, %57 %90 = fadd float %89, %84 %91 = fmul float %39, %57 %92 = fadd float %91, %86 %93 = fmul float %40, %57 %94 = fadd float %93, %88 %95 = fmul float %41, %58 %96 = fadd float %95, %90 %97 = fmul float %42, %58 %98 = fadd float %97, %92 %99 = fmul float %29, %63 %100 = fmul float %30, %64 %101 = fadd float %100, %99 %102 = fmul float %31, %65 %103 = fadd float %101, %102 %104 = fmul float %29, %70 %105 = fmul float %30, %71 %106 = fadd float %105, %104 %107 = fmul float %31, %72 %108 = fadd float %106, %107 %109 = fmul float %29, %77 %110 = fmul float %30, %78 %111 = fadd float %110, %109 %112 = fmul float %31, %79 %113 = fadd float %111, %112 %114 = fmul float %13, %47 %115 = fmul float %14, %47 %116 = fmul float %15, %47 %117 = fmul float %16, %47 %118 = fmul float %17, %48 %119 = fadd float %118, %114 %120 = fmul float %18, %48 %121 = fadd float %120, %115 %122 = fmul float %19, %48 %123 = fadd float %122, %116 %124 = fmul float %20, %48 %125 = fadd float %124, %117 %126 = fmul float %21, %49 %127 = fadd float %126, %119 %128 = fmul float %22, %49 %129 = fadd float %128, %121 %130 = fmul float %23, %49 %131 = fadd float %130, %123 %132 = fmul float %24, %49 %133 = fadd float %132, %125 %134 = fmul float %25, %50 %135 = fadd float %134, %127 %136 = fmul float %26, %50 %137 = fadd float %136, %129 %138 = fmul float %27, %50 %139 = fadd float %138, %131 %140 = fmul float %28, %50 %141 = fadd float %140, %133 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %96, float %98, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %103, float %108, float %113, float %94) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %135, float %137, float %139, float %141) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s24, s[0:3], 0xf ; C20C010F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 buffer_load_format_xyzw v[14:17], v0, s[20:23], 0 idxen ; E00C2000 80050E00 buffer_load_format_xyzw v[18:21], v0, s[8:11], 0 idxen ; E00C2000 80021200 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_buffer_load_dword s6, s[0:3], 0x12 ; C2030112 s_buffer_load_dword s7, s[0:3], 0x14 ; C2038114 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x17 ; C2048117 s_buffer_load_dword s10, s[0:3], 0x18 ; C2050118 s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119 s_buffer_load_dword s12, s[0:3], 0x1b ; C206011B s_buffer_load_dword s13, s[0:3], 0x0 ; C2068100 s_buffer_load_dword s14, s[0:3], 0x1 ; C2070101 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s16, s[0:3], 0x3 ; C2080103 s_buffer_load_dword s17, s[0:3], 0x4 ; C2088104 s_buffer_load_dword s18, s[0:3], 0x1c ; C209011C s_buffer_load_dword s19, s[0:3], 0x1d ; C209811D s_buffer_load_dword s20, s[0:3], 0x1f ; C20A011F s_buffer_load_dword s21, s[0:3], 0x20 ; C20A8120 s_buffer_load_dword s22, s[0:3], 0x21 ; C20B0121 s_buffer_load_dword s23, s[0:3], 0x5 ; C20B8105 s_buffer_load_dword s25, s[0:3], 0x6 ; C20C8106 s_buffer_load_dword s26, s[0:3], 0x7 ; C20D0107 s_buffer_load_dword s27, s[0:3], 0x8 ; C20D8108 s_buffer_load_dword s28, s[0:3], 0x9 ; C20E0109 s_buffer_load_dword s29, s[0:3], 0xa ; C20E810A s_buffer_load_dword s30, s[0:3], 0xb ; C20F010B s_buffer_load_dword s31, s[0:3], 0xc ; C20F810C s_buffer_load_dword s32, s[0:3], 0xd ; C210010D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s13, v2 ; 1000040D v_mul_f32_e32 v22, s7, v6 ; 102C0C07 v_mul_f32_e32 v23, s4, v10 ; 102E1404 v_mad_f32 v23, s5, v11, v23 ; D2820017 045E1605 v_mad_f32 v10, s6, v12, v23 ; D282000A 045E1806 v_mul_f32_e32 v11, s4, v14 ; 10161C04 v_mad_f32 v11, s5, v15, v11 ; D282000B 042E1E05 v_mad_f32 v11, s6, v16, v11 ; D282000B 042E2006 v_mul_f32_e32 v12, s4, v18 ; 10182404 v_mad_f32 v12, s5, v19, v12 ; D282000C 04322605 v_mad_f32 v12, s6, v20, v12 ; D282000C 04322806 v_mad_f32 v13, s10, v7, v22 ; D282000D 045A0E0A v_mul_f32_e32 v14, s8, v6 ; 101C0C08 v_mad_f32 v14, s11, v7, v14 ; D282000E 043A0E0B v_mul_f32_e32 v15, s9, v6 ; 101E0C09 v_mad_f32 v15, s12, v7, v15 ; D282000F 043E0E0C v_mad_f32 v13, s18, v8, v13 ; D282000D 04361012 v_mad_f32 v14, s19, v8, v14 ; D282000E 043A1013 v_mad_f32 v15, s20, v8, v15 ; D282000F 043E1014 v_mad_f32 v13, s21, v9, v13 ; D282000D 04361215 v_mad_f32 v6, s22, v9, v14 ; D2820006 043A1216 v_mad_f32 v0, s17, v3, v0 ; D2820000 04020611 v_mul_f32_e32 v7, s14, v2 ; 100E040E v_mad_f32 v7, s23, v3, v7 ; D2820007 041E0617 v_mul_f32_e32 v8, s15, v2 ; 1010040F v_mad_f32 v8, s25, v3, v8 ; D2820008 04220619 v_mul_f32_e32 v9, s16, v2 ; 10120410 v_mad_f32 v9, s26, v3, v9 ; D2820009 0426061A v_mad_f32 v0, s27, v4, v0 ; D2820000 0402081B v_mad_f32 v7, s28, v4, v7 ; D2820007 041E081C v_mad_f32 v8, s29, v4, v8 ; D2820008 0422081D v_mad_f32 v9, s30, v4, v9 ; D2820009 0426081E v_mad_f32 v0, s31, v5, v0 ; D2820000 04020A1F v_mad_f32 v7, s32, v5, v7 ; D2820007 041E0A20 v_mad_f32 v8, s0, v5, v8 ; D2820008 04220A00 v_mad_f32 v2, s24, v5, v9 ; D2820002 04260A18 exp 15, 32, 0, 0, 0, v13, v6, v1, v1 ; F800020F 0101060D exp 15, 33, 0, 0, 0, v10, v11, v12, v15 ; F800021F 0F0C0B0A exp 15, 12, 0, 1, 0, v0, v7, v8, v2 ; F80008CF 02080700 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 24 Code Size: 476 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[5..11] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { -0.5000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D 2: MUL TEMP[1].x, TEMP[0].wwww, CONST[10].xxxx 3: MOV TEMP[1].w, TEMP[1].xxxx 4: MOV TEMP[2].xy, IN[0].xyyy 5: TEX TEMP[2].xyz, TEMP[2], SAMP[4], 2D 6: MOV TEMP[3].xy, IN[0].xyyy 7: TEX TEMP[3].xyz, TEMP[3], SAMP[3], 2D 8: MUL TEMP[3].xyz, TEMP[3].xyzz, CONST[5].xyzz 9: MAD TEMP[2].xyz, TEMP[2].xyzz, CONST[6].xyzz, TEMP[3].xyzz 10: ADD TEMP[1].xyz, TEMP[0].xyzz, TEMP[2].xyzz 11: MOV TEMP[0].xy, IN[0].xyyy 12: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D 13: ADD TEMP[0].xyz, TEMP[0].xyzz, IMM[0].xxxx 14: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 15: RSQ TEMP[2].x, TEMP[2].xxxx 16: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx 17: DP3 TEMP[2].x, IN[1].xyzz, IN[1].xyzz 18: RSQ TEMP[2].x, TEMP[2].xxxx 19: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx 20: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[2].xyzz 21: MAX TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy 22: MUL TEMP[0].xyz, CONST[8].xyzz, TEMP[0].xxxx 23: MAD TEMP[0].xyz, TEMP[0].xyzz, CONST[11].xyzz, CONST[7].xyzz 24: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[0].xyzz 25: MOV TEMP[0].xy, IN[0].xyyy 26: TEX TEMP[0].xyz, TEMP[0], SAMP[2], 2D 27: MAD TEMP[1].xyz, TEMP[0].xyzz, CONST[9].xyzz, TEMP[1].xyzz 28: MOV OUT[0], TEMP[1] 29: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %43 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %44 = load <32 x i8>, <32 x i8> addrspace(2)* %43, align 32, !tbaa !0 %45 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %48 = bitcast <8 x i32> addrspace(2)* %47 to <32 x i8> addrspace(2)* %49 = load <32 x i8>, <32 x i8> addrspace(2)* %48, align 32, !tbaa !0 %50 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %51 = bitcast <4 x i32> addrspace(2)* %50 to <16 x i8> addrspace(2)* %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %54 = bitcast <8 x i32> addrspace(2)* %53 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %57 = bitcast <4 x i32> addrspace(2)* %56 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %60 = bitcast <8 x i32> addrspace(2)* %59 to <32 x i8> addrspace(2)* %61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %63 = bitcast <4 x i32> addrspace(2)* %62 to <16 x i8> addrspace(2)* %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %76 = bitcast float %71 to i32 %77 = bitcast float %72 to i32 %78 = insertelement <2 x i32> undef, i32 %76, i32 0 %79 = insertelement <2 x i32> %78, i32 %77, i32 1 %80 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %79, <32 x i8> %49, <16 x i8> %52, i32 2) %81 = extractelement <4 x float> %80, i32 0 %82 = extractelement <4 x float> %80, i32 1 %83 = extractelement <4 x float> %80, i32 2 %84 = extractelement <4 x float> %80, i32 3 %85 = fmul float %84, %39 %86 = bitcast float %71 to i32 %87 = bitcast float %72 to i32 %88 = insertelement <2 x i32> undef, i32 %86, i32 0 %89 = insertelement <2 x i32> %88, i32 %87, i32 1 %90 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %89, <32 x i8> %67, <16 x i8> %70, i32 2) %91 = extractelement <4 x float> %90, i32 0 %92 = extractelement <4 x float> %90, i32 1 %93 = extractelement <4 x float> %90, i32 2 %94 = bitcast float %71 to i32 %95 = bitcast float %72 to i32 %96 = insertelement <2 x i32> undef, i32 %94, i32 0 %97 = insertelement <2 x i32> %96, i32 %95, i32 1 %98 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %97, <32 x i8> %61, <16 x i8> %64, i32 2) %99 = extractelement <4 x float> %98, i32 0 %100 = extractelement <4 x float> %98, i32 1 %101 = extractelement <4 x float> %98, i32 2 %102 = fmul float %99, %24 %103 = fmul float %100, %25 %104 = fmul float %101, %26 %105 = fmul float %91, %27 %106 = fadd float %105, %102 %107 = fmul float %92, %28 %108 = fadd float %107, %103 %109 = fmul float %93, %29 %110 = fadd float %109, %104 %111 = fadd float %81, %106 %112 = fadd float %82, %108 %113 = fadd float %83, %110 %114 = bitcast float %71 to i32 %115 = bitcast float %72 to i32 %116 = insertelement <2 x i32> undef, i32 %114, i32 0 %117 = insertelement <2 x i32> %116, i32 %115, i32 1 %118 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %117, <32 x i8> %44, <16 x i8> %46, i32 2) %119 = extractelement <4 x float> %118, i32 0 %120 = extractelement <4 x float> %118, i32 1 %121 = extractelement <4 x float> %118, i32 2 %122 = fadd float %119, -5.000000e-01 %123 = fadd float %120, -5.000000e-01 %124 = fadd float %121, -5.000000e-01 %125 = fmul float %122, %122 %126 = fmul float %123, %123 %127 = fadd float %126, %125 %128 = fmul float %124, %124 %129 = fadd float %127, %128 %130 = call float @llvm.AMDGPU.rsq.clamped.f32(float %129) %131 = fmul float %122, %130 %132 = fmul float %123, %130 %133 = fmul float %124, %130 %134 = fmul float %73, %73 %135 = fmul float %74, %74 %136 = fadd float %135, %134 %137 = fmul float %75, %75 %138 = fadd float %136, %137 %139 = call float @llvm.AMDGPU.rsq.clamped.f32(float %138) %140 = fmul float %73, %139 %141 = fmul float %74, %139 %142 = fmul float %75, %139 %143 = fmul float %131, %140 %144 = fmul float %132, %141 %145 = fadd float %144, %143 %146 = fmul float %133, %142 %147 = fadd float %145, %146 %148 = call float @llvm.maxnum.f32(float %147, float 0.000000e+00) %149 = fmul float %33, %148 %150 = fmul float %34, %148 %151 = fmul float %35, %148 %152 = fmul float %149, %40 %153 = fadd float %152, %30 %154 = fmul float %150, %41 %155 = fadd float %154, %31 %156 = fmul float %151, %42 %157 = fadd float %156, %32 %158 = fmul float %111, %153 %159 = fmul float %112, %155 %160 = fmul float %113, %157 %161 = bitcast float %71 to i32 %162 = bitcast float %72 to i32 %163 = insertelement <2 x i32> undef, i32 %161, i32 0 %164 = insertelement <2 x i32> %163, i32 %162, i32 1 %165 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %164, <32 x i8> %55, <16 x i8> %58, i32 2) %166 = extractelement <4 x float> %165, i32 0 %167 = extractelement <4 x float> %165, i32 1 %168 = extractelement <4 x float> %165, i32 2 %169 = fmul float %166, %36 %170 = fadd float %169, %158 %171 = fmul float %167, %37 %172 = fadd float %171, %159 %173 = fmul float %168, %38 %174 = fadd float %173, %160 %175 = call i32 @llvm.SI.packf16(float %170, float %172) %176 = bitcast i32 %175 to float %177 = call i32 @llvm.SI.packf16(float %174, float %85) %178 = bitcast i32 %177 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %176, float %178, float %176, float %178) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx4 s[16:19], s[4:5], 0xc ; C088050C s_load_dwordx4 s[20:23], s[4:5], 0x10 ; C08A0510 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_buffer_load_dword s5, s[0:3], 0x15 ; C2028115 s_buffer_load_dword s36, s[0:3], 0x16 ; C2120116 s_buffer_load_dword s37, s[0:3], 0x18 ; C2128118 s_buffer_load_dword s38, s[0:3], 0x19 ; C2130119 s_buffer_load_dword s39, s[0:3], 0x1a ; C213811A s_buffer_load_dword s40, s[0:3], 0x1c ; C214011C s_buffer_load_dword s41, s[0:3], 0x1d ; C214811D s_buffer_load_dword s42, s[0:3], 0x1e ; C215011E s_buffer_load_dword s43, s[0:3], 0x20 ; C2158120 s_buffer_load_dword s44, s[0:3], 0x28 ; C2160128 s_buffer_load_dword s45, s[0:3], 0x2c ; C216812C s_buffer_load_dword s46, s[0:3], 0x2d ; C217012D s_buffer_load_dword s47, s[0:3], 0x2e ; C217812E s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_load_dwordx8 s[48:55], s[6:7], 0x8 ; C0D80708 s_load_dwordx8 s[56:63], s[6:7], 0x10 ; C0DC0710 s_load_dwordx8 s[64:71], s[6:7], 0x18 ; C0E00718 s_load_dwordx8 s[72:79], s[6:7], 0x20 ; C0E40720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[48:55], s[24:27] ; F0800F00 00CC0702 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, s44, v10 ; 1000142C image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[72:79], s[20:23] ; F0800700 00B20B02 image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[64:71], s[16:19] ; F0800700 00900E02 image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[8:11] ; F0800700 00471102 image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[56:63], s[12:15] ; F0800700 006E0102 s_waitcnt vmcnt(2) ; BF8C0772 v_mad_f32 v20, v14, s4, v7 ; D2820014 041C090E v_mad_f32 v21, v15, s5, v8 ; D2820015 04200B0F v_mad_f32 v7, v16, s36, v9 ; D2820007 04244910 v_mad_f32 v8, v11, s37, v20 ; D2820008 04504B0B v_mad_f32 v9, v12, s38, v21 ; D2820009 04544D0C v_mad_f32 v7, v13, s39, v7 ; D2820007 041C4F0D s_waitcnt vmcnt(1) ; BF8C0771 v_add_f32_e32 v10, -0.5, v17 ; 061422F1 v_add_f32_e32 v11, -0.5, v18 ; 061624F1 v_add_f32_e32 v12, -0.5, v19 ; 061826F1 v_mul_f32_e32 v13, v10, v10 ; 101A150A v_mad_f32 v13, v11, v11, v13 ; D282000D 0436170B v_mad_f32 v13, v12, v12, v13 ; D282000D 0436190C v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mul_f32_e32 v14, v4, v4 ; 101C0904 v_mad_f32 v14, v5, v5, v14 ; D282000E 043A0B05 v_mad_f32 v14, v6, v6, v14 ; D282000E 043A0D06 v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_buffer_load_dword s5, s[0:3], 0x22 ; C2028122 s_buffer_load_dword s6, s[0:3], 0x24 ; C2030124 s_buffer_load_dword s7, s[0:3], 0x25 ; C2038125 s_buffer_load_dword s0, s[0:3], 0x26 ; C2000126 v_mul_f32_e32 v10, v13, v10 ; 1014150D v_mul_f32_e32 v4, v14, v4 ; 1008090E v_mul_f32_e32 v4, v4, v10 ; 10081504 v_mul_f32_e32 v10, v13, v11 ; 1014170D v_mul_f32_e32 v5, v14, v5 ; 100A0B0E v_mad_f32 v4, v10, v5, v4 ; D2820004 04120B0A v_mul_f32_e32 v5, v13, v12 ; 100A190D v_mul_f32_e32 v6, v14, v6 ; 100C0D0E v_mad_f32 v4, v5, v6, v4 ; D2820004 04120D05 v_max_f32_e32 v4, 0, v4 ; 20080880 v_mul_f32_e32 v5, s43, v4 ; 100A082B v_mov_b32_e32 v6, s40 ; 7E0C0228 v_mad_f32 v5, s45, v5, v6 ; D2820005 041A0A2D s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v6, s4, v4 ; 100C0804 v_mov_b32_e32 v10, s41 ; 7E140229 v_mad_f32 v6, s46, v6, v10 ; D2820006 042A0C2E v_mul_f32_e32 v4, s5, v4 ; 10080805 v_mov_b32_e32 v10, s42 ; 7E14022A v_mad_f32 v4, s47, v4, v10 ; D2820004 042A082F v_mul_f32_e32 v5, v5, v8 ; 100A1105 v_mul_f32_e32 v6, v6, v9 ; 100C1306 v_mul_f32_e32 v4, v4, v7 ; 10080F04 v_mad_f32 v5, v1, s6, v5 ; D2820005 04140D01 v_mad_f32 v6, v2, s7, v6 ; D2820006 04180F02 v_mad_f32 v1, v3, s0, v4 ; D2820001 04100103 v_cvt_pkrtz_f16_f32_e32 v2, v5, v6 ; 5E040D05 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 24 Code Size: 496 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL OUT[6], GENERIC[14] DCL CONST[0..9] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MUL TEMP[1], CONST[6], IN[1].xxxx 2: MAD TEMP[1], CONST[7], IN[1].yyyy, TEMP[1] 3: MAD TEMP[1], CONST[8], IN[1].zzzz, TEMP[1] 4: MAD TEMP[1].xy, CONST[9], IN[1].wwww, TEMP[1] 5: MOV TEMP[0].xy, TEMP[1].xyxx 6: DP3 TEMP[1].x, CONST[5].xyzz, IN[2].xyzz 7: DP3 TEMP[2].x, CONST[5].xyzz, IN[3].xyzz 8: MOV TEMP[1].y, TEMP[2].xxxx 9: DP3 TEMP[2].x, CONST[5].xyzz, IN[4].xyzz 10: MOV TEMP[1].z, TEMP[2].xxxx 11: ADD TEMP[2].xyz, CONST[4].xyzz, -IN[0].xyzz 12: DP3 TEMP[3].x, TEMP[2].xyzz, IN[2].xyzz 13: DP3 TEMP[4].x, TEMP[2].xyzz, IN[3].xyzz 14: MOV TEMP[3].y, TEMP[4].xxxx 15: DP3 TEMP[2].x, TEMP[2].xyzz, IN[4].xyzz 16: MOV TEMP[3].z, TEMP[2].xxxx 17: MOV TEMP[3].w, IMM[0].xxxx 18: MOV TEMP[2].w, IMM[0].xxxx 19: MOV TEMP[2].xyz, IN[2].xyzx 20: MOV TEMP[4].w, IMM[0].xxxx 21: MOV TEMP[4].xyz, IN[3].xyzx 22: MOV TEMP[5].w, IMM[0].xxxx 23: MOV TEMP[5].xyz, IN[4].xyzx 24: MUL TEMP[6], CONST[0], IN[0].xxxx 25: MAD TEMP[6], CONST[1], IN[0].yyyy, TEMP[6] 26: MAD TEMP[6], CONST[2], IN[0].zzzz, TEMP[6] 27: MAD TEMP[6], CONST[3], IN[0].wwww, TEMP[6] 28: MOV TEMP[1].xyz, TEMP[1].xyzx 29: MOV OUT[6], TEMP[1] 30: MOV OUT[3], TEMP[5] 31: MOV OUT[4], TEMP[2] 32: MOV OUT[0], TEMP[6] 33: MOV OUT[5], TEMP[4] 34: MOV OUT[1], TEMP[3] 35: MOV OUT[2], TEMP[0] 36: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = extractelement <4 x float> %57, i32 3 %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 %64 = add i32 %5, %7 %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %64) %66 = extractelement <4 x float> %65, i32 0 %67 = extractelement <4 x float> %65, i32 1 %68 = extractelement <4 x float> %65, i32 2 %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = add i32 %5, %7 %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %71) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = extractelement <4 x float> %72, i32 2 %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !tbaa !0 %78 = add i32 %5, %7 %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %77, i32 0, i32 %78) %80 = extractelement <4 x float> %79, i32 0 %81 = extractelement <4 x float> %79, i32 1 %82 = extractelement <4 x float> %79, i32 2 %83 = fmul float %35, %58 %84 = fmul float %36, %58 %85 = fmul float %37, %58 %86 = fmul float %38, %59 %87 = fadd float %86, %83 %88 = fmul float %39, %59 %89 = fadd float %88, %84 %90 = fmul float %40, %59 %91 = fadd float %90, %85 %92 = fmul float %41, %60 %93 = fadd float %92, %87 %94 = fmul float %42, %60 %95 = fadd float %94, %89 %96 = fmul float %43, %60 %97 = fadd float %96, %91 %98 = fmul float %44, %61 %99 = fadd float %98, %93 %100 = fmul float %45, %61 %101 = fadd float %100, %95 %102 = fmul float %32, %66 %103 = fmul float %33, %67 %104 = fadd float %103, %102 %105 = fmul float %34, %68 %106 = fadd float %104, %105 %107 = fmul float %32, %73 %108 = fmul float %33, %74 %109 = fadd float %108, %107 %110 = fmul float %34, %75 %111 = fadd float %109, %110 %112 = fmul float %32, %80 %113 = fmul float %33, %81 %114 = fadd float %113, %112 %115 = fmul float %34, %82 %116 = fadd float %114, %115 %117 = fsub float %29, %50 %118 = fsub float %30, %51 %119 = fsub float %31, %52 %120 = fmul float %117, %66 %121 = fmul float %118, %67 %122 = fadd float %121, %120 %123 = fmul float %119, %68 %124 = fadd float %122, %123 %125 = fmul float %117, %73 %126 = fmul float %118, %74 %127 = fadd float %126, %125 %128 = fmul float %119, %75 %129 = fadd float %127, %128 %130 = fmul float %117, %80 %131 = fmul float %118, %81 %132 = fadd float %131, %130 %133 = fmul float %119, %82 %134 = fadd float %132, %133 %135 = fmul float %13, %50 %136 = fmul float %14, %50 %137 = fmul float %15, %50 %138 = fmul float %16, %50 %139 = fmul float %17, %51 %140 = fadd float %139, %135 %141 = fmul float %18, %51 %142 = fadd float %141, %136 %143 = fmul float %19, %51 %144 = fadd float %143, %137 %145 = fmul float %20, %51 %146 = fadd float %145, %138 %147 = fmul float %21, %52 %148 = fadd float %147, %140 %149 = fmul float %22, %52 %150 = fadd float %149, %142 %151 = fmul float %23, %52 %152 = fadd float %151, %144 %153 = fmul float %24, %52 %154 = fadd float %153, %146 %155 = fmul float %25, %53 %156 = fadd float %155, %148 %157 = fmul float %26, %53 %158 = fadd float %157, %150 %159 = fmul float %27, %53 %160 = fadd float %159, %152 %161 = fmul float %28, %53 %162 = fadd float %161, %154 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %124, float %129, float %134, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %99, float %101, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %80, float %81, float %82, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %66, float %67, float %68, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %73, float %74, float %75, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %106, float %111, float %116, float %97) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %156, float %158, float %160, float %162) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s24, s[0:3], 0xf ; C20C010F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 buffer_load_format_xyzw v[14:17], v0, s[20:23], 0 idxen ; E00C2000 80050E00 buffer_load_format_xyzw v[18:21], v0, s[8:11], 0 idxen ; E00C2000 80021200 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_buffer_load_dword s6, s[0:3], 0x12 ; C2030112 s_buffer_load_dword s7, s[0:3], 0x14 ; C2038114 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x16 ; C2048116 s_buffer_load_dword s10, s[0:3], 0x18 ; C2050118 s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119 s_buffer_load_dword s12, s[0:3], 0x1b ; C206011B s_buffer_load_dword s13, s[0:3], 0x1c ; C206811C s_buffer_load_dword s14, s[0:3], 0x1d ; C207011D s_buffer_load_dword s15, s[0:3], 0x1f ; C207811F s_buffer_load_dword s16, s[0:3], 0x20 ; C2080120 s_buffer_load_dword s17, s[0:3], 0x21 ; C2088121 s_buffer_load_dword s18, s[0:3], 0x23 ; C2090123 s_buffer_load_dword s19, s[0:3], 0x24 ; C2098124 s_buffer_load_dword s20, s[0:3], 0x25 ; C20A0125 s_buffer_load_dword s21, s[0:3], 0x0 ; C20A8100 s_buffer_load_dword s22, s[0:3], 0x1 ; C20B0101 s_buffer_load_dword s23, s[0:3], 0x2 ; C20B8102 s_buffer_load_dword s25, s[0:3], 0x3 ; C20C8103 s_buffer_load_dword s26, s[0:3], 0x4 ; C20D0104 s_buffer_load_dword s27, s[0:3], 0x5 ; C20D8105 s_buffer_load_dword s28, s[0:3], 0x6 ; C20E0106 s_buffer_load_dword s29, s[0:3], 0x7 ; C20E8107 s_buffer_load_dword s30, s[0:3], 0x8 ; C20F0108 s_buffer_load_dword s31, s[0:3], 0x9 ; C20F8109 s_buffer_load_dword s32, s[0:3], 0xa ; C210010A s_buffer_load_dword s33, s[0:3], 0xb ; C210810B s_buffer_load_dword s34, s[0:3], 0xc ; C211010C s_buffer_load_dword s35, s[0:3], 0xd ; C211810D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s21, v2 ; 10000415 v_mul_f32_e32 v22, s10, v6 ; 102C0C0A v_mad_f32 v22, s13, v7, v22 ; D2820016 045A0E0D v_mul_f32_e32 v23, s11, v6 ; 102E0C0B v_mad_f32 v23, s14, v7, v23 ; D2820017 045E0E0E v_mul_f32_e32 v24, s12, v6 ; 10300C0C v_mad_f32 v24, s15, v7, v24 ; D2820018 04620E0F v_mad_f32 v22, s16, v8, v22 ; D2820016 045A1010 v_mad_f32 v23, s17, v8, v23 ; D2820017 045E1011 v_mad_f32 v24, s18, v8, v24 ; D2820018 04621012 v_mad_f32 v22, s19, v9, v22 ; D2820016 045A1213 v_mad_f32 v6, s20, v9, v23 ; D2820006 045E1214 v_mad_f32 v0, s26, v3, v0 ; D2820000 0402061A v_mul_f32_e32 v7, s22, v2 ; 100E0416 v_mad_f32 v7, s27, v3, v7 ; D2820007 041E061B v_mul_f32_e32 v8, s23, v2 ; 10100417 v_mad_f32 v8, s28, v3, v8 ; D2820008 0422061C v_mul_f32_e32 v9, s25, v2 ; 10120419 v_mad_f32 v9, s29, v3, v9 ; D2820009 0426061D v_mad_f32 v0, s30, v4, v0 ; D2820000 0402081E v_mad_f32 v7, s31, v4, v7 ; D2820007 041E081F v_mad_f32 v8, s32, v4, v8 ; D2820008 04220820 v_mad_f32 v9, s33, v4, v9 ; D2820009 04260821 v_mad_f32 v0, s34, v5, v0 ; D2820000 04020A22 v_mad_f32 v7, s35, v5, v7 ; D2820007 041E0A23 v_mad_f32 v8, s0, v5, v8 ; D2820008 04220A00 v_mad_f32 v9, s24, v5, v9 ; D2820009 04260A18 v_sub_f32_e32 v23, s4, v2 ; 082E0404 v_sub_f32_e32 v25, s5, v3 ; 08320605 v_sub_f32_e32 v2, s6, v4 ; 08040806 v_mul_f32_e32 v3, v10, v23 ; 10062F0A v_mul_f32_e32 v4, v14, v23 ; 10082F0E v_mul_f32_e32 v5, v18, v23 ; 100A2F12 v_mad_f32 v3, v25, v11, v3 ; D2820003 040E1719 v_mad_f32 v4, v25, v15, v4 ; D2820004 04121F19 v_mad_f32 v5, v25, v19, v5 ; D2820005 04162719 v_mad_f32 v3, v2, v12, v3 ; D2820003 040E1902 v_mad_f32 v4, v2, v16, v4 ; D2820004 04122102 v_mad_f32 v2, v2, v20, v5 ; D2820002 04162902 exp 15, 32, 0, 0, 0, v3, v4, v2, v1 ; F800020F 01020403 exp 15, 33, 0, 0, 0, v22, v6, v1, v1 ; F800021F 01010616 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, s7, v18 ; 10042407 v_mad_f32 v2, s8, v19, v2 ; D2820002 040A2608 v_mad_f32 v2, s9, v20, v2 ; D2820002 040A2809 exp 15, 34, 0, 0, 0, v18, v19, v20, v1 ; F800022F 01141312 v_mul_f32_e32 v3, s7, v10 ; 10061407 v_mul_f32_e32 v4, s7, v14 ; 10081C07 v_mad_f32 v3, s8, v11, v3 ; D2820003 040E1608 v_mad_f32 v4, s8, v15, v4 ; D2820004 04121E08 v_mad_f32 v3, s9, v12, v3 ; D2820003 040E1809 v_mad_f32 v4, s9, v16, v4 ; D2820004 04122009 exp 15, 35, 0, 0, 0, v10, v11, v12, v1 ; F800023F 010C0B0A exp 15, 36, 0, 0, 0, v14, v15, v16, v1 ; F800024F 01100F0E exp 15, 37, 0, 0, 0, v3, v4, v2, v24 ; F800025F 18020403 exp 15, 12, 0, 1, 0, v0, v7, v8, v9 ; F80008CF 09080700 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 28 Code Size: 596 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL IN[5], GENERIC[14], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL CONST[4..12] DCL CONST[15] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { -0.5000, 2.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D 2: MUL TEMP[1].x, TEMP[0].wwww, CONST[8].xxxx 3: MOV TEMP[1].w, TEMP[1].xxxx 4: MOV TEMP[2].xy, IN[1].xyyy 5: TEX TEMP[2].xyz, TEMP[2], SAMP[3], 2D 6: MOV TEMP[3].xy, IN[1].xyyy 7: TEX TEMP[3].xyz, TEMP[3], SAMP[2], 2D 8: MUL TEMP[3].xyz, TEMP[3].xyzz, CONST[4].xyzz 9: MAD TEMP[2].xyz, TEMP[2].xyzz, CONST[5].xyzz, TEMP[3].xyzz 10: ADD TEMP[1].xyz, TEMP[0].xyzz, TEMP[2].xyzz 11: MOV TEMP[0].xy, IN[1].xyyy 12: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D 13: ADD TEMP[0].xyz, TEMP[0].xyzz, IMM[0].xxxx 14: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 15: RSQ TEMP[2].x, TEMP[2].xxxx 16: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx 17: MOV TEMP[2].xyz, -IN[0].xyzx 18: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[2].xyzz 19: MUL TEMP[3].xyz, TEMP[3].xxxx, TEMP[0].xyzz 20: MUL TEMP[3].xyz, IMM[0].yyyy, TEMP[3].xyzz 21: ADD TEMP[2].xyz, TEMP[2].xyzz, -TEMP[3].xyzz 22: MUL TEMP[3].xyz, TEMP[2].xxxx, IN[3].xyzz 23: MAD TEMP[3].xyz, TEMP[2].yyyy, IN[4].xyzz, TEMP[3].xyzz 24: MAD TEMP[2].xyz, TEMP[2].zzzz, IN[2].xyzz, TEMP[3].xyzz 25: MOV TEMP[3].xy, IN[1].xyyy 26: TEX TEMP[3].xyz, TEMP[3], SAMP[4], 2D 27: MUL TEMP[4], CONST[9], TEMP[2].xxxx 28: MAD TEMP[4], CONST[10], TEMP[2].yyyy, TEMP[4] 29: MAD TEMP[2].xyz, CONST[11], TEMP[2].zzzz, TEMP[4] 30: MOV TEMP[2].xyz, TEMP[2].xyzz 31: TEX TEMP[2].xyz, TEMP[2], SAMP[5], CUBE 32: MAD TEMP[2].xyz, TEMP[3].xyzz, TEMP[2].xyzz, TEMP[1].xyzz 33: DP3 TEMP[3].x, IN[5].xyzz, IN[5].xyzz 34: RSQ TEMP[3].x, TEMP[3].xxxx 35: MUL TEMP[3].xyz, IN[5].xyzz, TEMP[3].xxxx 36: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[3].xyzz 37: MAX TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz 38: MUL TEMP[0].xyz, CONST[7].xyzz, TEMP[0].xxxx 39: MAD TEMP[0].xyz, TEMP[0].xyzz, CONST[15].xyzz, CONST[6].xyzz 40: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[0].xyzz 41: MOV OUT[0], TEMP[1] 42: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248) %49 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %50 = load <32 x i8>, <32 x i8> addrspace(2)* %49, align 32, !tbaa !0 %51 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %54 = bitcast <8 x i32> addrspace(2)* %53 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %57 = bitcast <4 x i32> addrspace(2)* %56 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %60 = bitcast <8 x i32> addrspace(2)* %59 to <32 x i8> addrspace(2)* %61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %63 = bitcast <4 x i32> addrspace(2)* %62 to <16 x i8> addrspace(2)* %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %72 = bitcast <8 x i32> addrspace(2)* %71 to <32 x i8> addrspace(2)* %73 = load <32 x i8>, <32 x i8> addrspace(2)* %72, align 32, !tbaa !0 %74 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %75 = bitcast <4 x i32> addrspace(2)* %74 to <16 x i8> addrspace(2)* %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 %77 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %78 = bitcast <8 x i32> addrspace(2)* %77 to <32 x i8> addrspace(2)* %79 = load <32 x i8>, <32 x i8> addrspace(2)* %78, align 32, !tbaa !0 %80 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %81 = bitcast <4 x i32> addrspace(2)* %80 to <16 x i8> addrspace(2)* %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %91 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %100 = bitcast float %86 to i32 %101 = bitcast float %87 to i32 %102 = insertelement <2 x i32> undef, i32 %100, i32 0 %103 = insertelement <2 x i32> %102, i32 %101, i32 1 %104 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %103, <32 x i8> %55, <16 x i8> %58, i32 2) %105 = extractelement <4 x float> %104, i32 0 %106 = extractelement <4 x float> %104, i32 1 %107 = extractelement <4 x float> %104, i32 2 %108 = extractelement <4 x float> %104, i32 3 %109 = fmul float %108, %36 %110 = bitcast float %86 to i32 %111 = bitcast float %87 to i32 %112 = insertelement <2 x i32> undef, i32 %110, i32 0 %113 = insertelement <2 x i32> %112, i32 %111, i32 1 %114 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %113, <32 x i8> %67, <16 x i8> %70, i32 2) %115 = extractelement <4 x float> %114, i32 0 %116 = extractelement <4 x float> %114, i32 1 %117 = extractelement <4 x float> %114, i32 2 %118 = bitcast float %86 to i32 %119 = bitcast float %87 to i32 %120 = insertelement <2 x i32> undef, i32 %118, i32 0 %121 = insertelement <2 x i32> %120, i32 %119, i32 1 %122 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %121, <32 x i8> %61, <16 x i8> %64, i32 2) %123 = extractelement <4 x float> %122, i32 0 %124 = extractelement <4 x float> %122, i32 1 %125 = extractelement <4 x float> %122, i32 2 %126 = fmul float %123, %24 %127 = fmul float %124, %25 %128 = fmul float %125, %26 %129 = fmul float %115, %27 %130 = fadd float %129, %126 %131 = fmul float %116, %28 %132 = fadd float %131, %127 %133 = fmul float %117, %29 %134 = fadd float %133, %128 %135 = fadd float %105, %130 %136 = fadd float %106, %132 %137 = fadd float %107, %134 %138 = bitcast float %86 to i32 %139 = bitcast float %87 to i32 %140 = insertelement <2 x i32> undef, i32 %138, i32 0 %141 = insertelement <2 x i32> %140, i32 %139, i32 1 %142 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %141, <32 x i8> %50, <16 x i8> %52, i32 2) %143 = extractelement <4 x float> %142, i32 0 %144 = extractelement <4 x float> %142, i32 1 %145 = extractelement <4 x float> %142, i32 2 %146 = fadd float %143, -5.000000e-01 %147 = fadd float %144, -5.000000e-01 %148 = fadd float %145, -5.000000e-01 %149 = fmul float %146, %146 %150 = fmul float %147, %147 %151 = fadd float %150, %149 %152 = fmul float %148, %148 %153 = fadd float %151, %152 %154 = call float @llvm.AMDGPU.rsq.clamped.f32(float %153) %155 = fmul float %146, %154 %156 = fmul float %147, %154 %157 = fmul float %148, %154 %158 = fmul float %83, %155 %159 = fsub float -0.000000e+00, %158 %160 = fmul float %84, %156 %161 = fsub float %159, %160 %162 = fmul float %85, %157 %163 = fsub float %161, %162 %164 = fmul float %163, %155 %165 = fmul float %163, %156 %166 = fmul float %163, %157 %167 = fmul float %164, 2.000000e+00 %168 = fmul float %165, 2.000000e+00 %169 = fmul float %166, 2.000000e+00 %170 = fsub float -0.000000e+00, %167 %171 = fsub float %170, %83 %172 = fsub float -0.000000e+00, %168 %173 = fsub float %172, %84 %174 = fsub float -0.000000e+00, %169 %175 = fsub float %174, %85 %176 = fmul float %171, %91 %177 = fmul float %171, %92 %178 = fmul float %171, %93 %179 = fmul float %173, %94 %180 = fadd float %179, %176 %181 = fmul float %173, %95 %182 = fadd float %181, %177 %183 = fmul float %173, %96 %184 = fadd float %183, %178 %185 = fmul float %175, %88 %186 = fadd float %185, %180 %187 = fmul float %175, %89 %188 = fadd float %187, %182 %189 = fmul float %175, %90 %190 = fadd float %189, %184 %191 = bitcast float %86 to i32 %192 = bitcast float %87 to i32 %193 = insertelement <2 x i32> undef, i32 %191, i32 0 %194 = insertelement <2 x i32> %193, i32 %192, i32 1 %195 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %194, <32 x i8> %73, <16 x i8> %76, i32 2) %196 = extractelement <4 x float> %195, i32 0 %197 = extractelement <4 x float> %195, i32 1 %198 = extractelement <4 x float> %195, i32 2 %199 = fmul float %37, %186 %200 = fmul float %38, %186 %201 = fmul float %39, %186 %202 = fmul float %40, %188 %203 = fadd float %202, %199 %204 = fmul float %41, %188 %205 = fadd float %204, %200 %206 = fmul float %42, %188 %207 = fadd float %206, %201 %208 = fmul float %43, %190 %209 = fadd float %208, %203 %210 = fmul float %44, %190 %211 = fadd float %210, %205 %212 = fmul float %45, %190 %213 = fadd float %212, %207 %214 = insertelement <4 x float> undef, float %209, i32 0 %215 = insertelement <4 x float> %214, float %211, i32 1 %216 = insertelement <4 x float> %215, float %213, i32 2 %217 = insertelement <4 x float> %216, float 0.000000e+00, i32 3 %218 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %217) %219 = extractelement <4 x float> %218, i32 0 %220 = extractelement <4 x float> %218, i32 1 %221 = extractelement <4 x float> %218, i32 2 %222 = extractelement <4 x float> %218, i32 3 %223 = call float @fabs(float %221) %224 = fdiv float 1.000000e+00, %223 %225 = fmul float %219, %224 %226 = fadd float %225, 1.500000e+00 %227 = fmul float %220, %224 %228 = fadd float %227, 1.500000e+00 %229 = bitcast float %228 to i32 %230 = bitcast float %226 to i32 %231 = bitcast float %222 to i32 %232 = insertelement <4 x i32> undef, i32 %229, i32 0 %233 = insertelement <4 x i32> %232, i32 %230, i32 1 %234 = insertelement <4 x i32> %233, i32 %231, i32 2 %235 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %234, <32 x i8> %79, <16 x i8> %82, i32 4) %236 = extractelement <4 x float> %235, i32 0 %237 = extractelement <4 x float> %235, i32 1 %238 = extractelement <4 x float> %235, i32 2 %239 = fmul float %196, %236 %240 = fadd float %239, %135 %241 = fmul float %197, %237 %242 = fadd float %241, %136 %243 = fmul float %198, %238 %244 = fadd float %243, %137 %245 = fmul float %97, %97 %246 = fmul float %98, %98 %247 = fadd float %246, %245 %248 = fmul float %99, %99 %249 = fadd float %247, %248 %250 = call float @llvm.AMDGPU.rsq.clamped.f32(float %249) %251 = fmul float %97, %250 %252 = fmul float %98, %250 %253 = fmul float %99, %250 %254 = fmul float %155, %251 %255 = fmul float %156, %252 %256 = fadd float %255, %254 %257 = fmul float %157, %253 %258 = fadd float %256, %257 %259 = call float @llvm.maxnum.f32(float %258, float 0.000000e+00) %260 = fmul float %33, %259 %261 = fmul float %34, %259 %262 = fmul float %35, %259 %263 = fmul float %260, %46 %264 = fadd float %263, %30 %265 = fmul float %261, %47 %266 = fadd float %265, %31 %267 = fmul float %262, %48 %268 = fadd float %267, %32 %269 = fmul float %240, %264 %270 = fmul float %242, %266 %271 = fmul float %244, %268 %272 = call i32 @llvm.SI.packf16(float %269, float %270) %273 = bitcast i32 %272 to float %274 = call i32 @llvm.SI.packf16(float %271, float %109) %275 = bitcast i32 %274 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %273, float %275, float %273, float %275) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s3, s[8:11], 0x26 ; C2018926 s_buffer_load_dword s72, s[8:11], 0x28 ; C2240928 s_buffer_load_dword s73, s[8:11], 0x29 ; C2248929 s_buffer_load_dword s74, s[8:11], 0x2a ; C225092A s_buffer_load_dword s75, s[8:11], 0x2c ; C225892C s_buffer_load_dword s76, s[8:11], 0x2d ; C226092D s_buffer_load_dword s77, s[8:11], 0x2e ; C226892E s_buffer_load_dword s2, s[8:11], 0x3c ; C201093C s_buffer_load_dword s1, s[8:11], 0x3d ; C200893D s_buffer_load_dword s0, s[8:11], 0x3e ; C200093E v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 0, 3, [m0] ; C8280C00 v_interp_p2_f32 v10, [v10], v1, 0, 3, [m0] ; C8290C01 v_interp_p1_f32 v11, v0, 1, 3, [m0] ; C82C0D00 v_interp_p2_f32 v11, [v11], v1, 1, 3, [m0] ; C82D0D01 v_interp_p1_f32 v12, v0, 2, 3, [m0] ; C8300E00 v_interp_p2_f32 v12, [v12], v1, 2, 3, [m0] ; C8310E01 v_interp_p1_f32 v13, v0, 0, 4, [m0] ; C8341000 v_interp_p2_f32 v13, [v13], v1, 0, 4, [m0] ; C8351001 v_interp_p1_f32 v14, v0, 1, 4, [m0] ; C8381100 v_interp_p2_f32 v14, [v14], v1, 1, 4, [m0] ; C8391101 v_interp_p1_f32 v15, v0, 2, 4, [m0] ; C83C1200 v_interp_p2_f32 v15, [v15], v1, 2, 4, [m0] ; C83D1201 v_interp_p1_f32 v16, v0, 0, 5, [m0] ; C8401400 v_interp_p2_f32 v16, [v16], v1, 0, 5, [m0] ; C8411401 v_interp_p1_f32 v17, v0, 1, 5, [m0] ; C8441500 v_interp_p2_f32 v17, [v17], v1, 1, 5, [m0] ; C8451501 v_interp_p1_f32 v18, v0, 2, 5, [m0] ; C8481600 v_interp_p2_f32 v18, [v18], v1, 2, 5, [m0] ; C8491601 s_load_dwordx4 s[44:47], s[4:5], 0x4 ; C0960504 s_load_dwordx4 s[28:31], s[4:5], 0x8 ; C08E0508 s_load_dwordx4 s[40:43], s[4:5], 0xc ; C094050C s_load_dwordx4 s[12:15], s[4:5], 0x10 ; C0860510 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_load_dwordx8 s[64:71], s[6:7], 0x8 ; C0E00708 s_load_dwordx8 s[48:55], s[6:7], 0x10 ; C0D80710 s_load_dwordx8 s[56:63], s[6:7], 0x18 ; C0DC0718 s_load_dwordx8 s[16:23], s[6:7], 0x20 ; C0C80720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[64:71], s[44:47] ; F0800F00 01701305 image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[56:63], s[40:43] ; F0800700 014E1705 image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[48:55], s[28:31] ; F0800700 00EC1A05 image_sample v[29:31], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[32:39], s[24:27] ; F0800700 00C81D05 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, -0.5, v29 ; 06003AF1 v_add_f32_e32 v1, -0.5, v30 ; 06023CF1 v_add_f32_e32 v29, -0.5, v31 ; 063A3EF1 v_mul_f32_e32 v30, v0, v0 ; 103C0100 v_mad_f32 v30, v1, v1, v30 ; D282001E 047A0301 v_mad_f32 v30, v29, v29, v30 ; D282001E 047A3B1D v_rsq_clamp_f32_e32 v30, v30 ; 7E3C591E v_mul_f32_e32 v0, v30, v0 ; 1000011E v_mul_f32_e32 v1, v30, v1 ; 1002031E v_mul_f32_e32 v29, v30, v29 ; 103A3B1E v_mul_f32_e32 v30, v0, v2 ; 103C0500 v_mad_f32 v30, -v3, v1, -v30 ; D282001E A47A0303 v_mad_f32 v30, -v4, v29, v30 ; D282001E 247A3B04 v_mul_f32_e32 v31, v0, v30 ; 103E3D00SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL CONST[0..8] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MUL TEMP[1], CONST[5], IN[1].xxxx 2: MAD TEMP[1], CONST[6], IN[1].yyyy, TEMP[1] 3: MAD TEMP[1], CONST[7], IN[1].zzzz, TEMP[1] 4: MAD TEMP[1].xy, CONST[8], IN[1].wwww, TEMP[1] 5: MOV TEMP[0].xy, TEMP[1].xyxx 6: ADD TEMP[1].xyz, CONST[4].xyzz, -IN[0].xyzz 7: DP3 TEMP[2].x, TEMP[1].xyzz, IN[2].xyzz 8: DP3 TEMP[3].x, TEMP[1].xyzz, IN[3].xyzz 9: MOV TEMP[2].y, TEMP[3].xxxx 10: DP3 TEMP[1].x, TEMP[1].xyzz, IN[4].xyzz 11: MOV TEMP[2].z, TEMP[1].xxxx 12: MOV TEMP[2].w, IMM[0].xxxx 13: MOV TEMP[1].w, IMM[0].xxxx 14: MOV TEMP[1].xyz, IN[2].xyzx 15: MOV TEMP[3].w, IMM[0].xxxx 16: MOV TEMP[3].xyz, IN[3].xyzx 17: MOV TEMP[4].w, IMM[0].xxxx 18: MOV TEMP[4].xyz, IN[4].xyzx 19: MUL TEMP[5], CONST[0], IN[0].xxxx 20: MAD TEMP[5], CONST[1], IN[0].yyyy, TEMP[5] 21: MAD TEMP[5], CONST[2], IN[0].zzzz, TEMP[5] 22: MAD TEMP[5], CONST[3], IN[0].wwww, TEMP[5] 23: MOV OUT[3], TEMP[4] 24: MOV OUT[4], TEMP[1] 25: MOV OUT[0], TEMP[5] 26: MOV OUT[5], TEMP[3] 27: MOV OUT[1], TEMP[2] 28: MOV OUT[2], TEMP[0] 29: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = extractelement <4 x float> %43, i32 2 %47 = extractelement <4 x float> %43, i32 3 %48 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !tbaa !0 %50 = add i32 %5, %7 %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %50) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = extractelement <4 x float> %51, i32 2 %55 = extractelement <4 x float> %51, i32 3 %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 %58 = add i32 %5, %7 %59 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %58) %60 = extractelement <4 x float> %59, i32 0 %61 = extractelement <4 x float> %59, i32 1 %62 = extractelement <4 x float> %59, i32 2 %63 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = add i32 %5, %7 %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %64, i32 0, i32 %65) %67 = extractelement <4 x float> %66, i32 0 %68 = extractelement <4 x float> %66, i32 1 %69 = extractelement <4 x float> %66, i32 2 %70 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %71 = load <16 x i8>, <16 x i8> addrspace(2)* %70, align 16, !tbaa !0 %72 = add i32 %5, %7 %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %72) %74 = extractelement <4 x float> %73, i32 0 %75 = extractelement <4 x float> %73, i32 1 %76 = extractelement <4 x float> %73, i32 2 %77 = fmul float %32, %52 %78 = fmul float %33, %52 %79 = fmul float %34, %53 %80 = fadd float %79, %77 %81 = fmul float %35, %53 %82 = fadd float %81, %78 %83 = fmul float %36, %54 %84 = fadd float %83, %80 %85 = fmul float %37, %54 %86 = fadd float %85, %82 %87 = fmul float %38, %55 %88 = fadd float %87, %84 %89 = fmul float %39, %55 %90 = fadd float %89, %86 %91 = fsub float %29, %44 %92 = fsub float %30, %45 %93 = fsub float %31, %46 %94 = fmul float %91, %60 %95 = fmul float %92, %61 %96 = fadd float %95, %94 %97 = fmul float %93, %62 %98 = fadd float %96, %97 %99 = fmul float %91, %67 %100 = fmul float %92, %68 %101 = fadd float %100, %99 %102 = fmul float %93, %69 %103 = fadd float %101, %102 %104 = fmul float %91, %74 %105 = fmul float %92, %75 %106 = fadd float %105, %104 %107 = fmul float %93, %76 %108 = fadd float %106, %107 %109 = fmul float %13, %44 %110 = fmul float %14, %44 %111 = fmul float %15, %44 %112 = fmul float %16, %44 %113 = fmul float %17, %45 %114 = fadd float %113, %109 %115 = fmul float %18, %45 %116 = fadd float %115, %110 %117 = fmul float %19, %45 %118 = fadd float %117, %111 %119 = fmul float %20, %45 %120 = fadd float %119, %112 %121 = fmul float %21, %46 %122 = fadd float %121, %114 %123 = fmul float %22, %46 %124 = fadd float %123, %116 %125 = fmul float %23, %46 %126 = fadd float %125, %118 %127 = fmul float %24, %46 %128 = fadd float %127, %120 %129 = fmul float %25, %47 %130 = fadd float %129, %122 %131 = fmul float %26, %47 %132 = fadd float %131, %124 %133 = fmul float %27, %47 %134 = fadd float %133, %126 %135 = fmul float %28, %47 %136 = fadd float %135, %128 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %98, float %103, float %108, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %88, float %90, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %74, float %75, float %76, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %60, float %61, float %62, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %67, float %68, float %69, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %130, float %132, float %134, float %136) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s24, s[0:3], 0x15 ; C20C0115 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900 buffer_load_format_xyzw v[13:16], v0, s[20:23], 0 idxen ; E00C2000 80050D00 buffer_load_format_xyzw v[17:20], v0, s[8:11], 0 idxen ; E00C2000 80021100 s_buffer_load_dword s5, s[0:3], 0x19 ; C2028119 s_buffer_load_dword s6, s[0:3], 0x1c ; C203011C s_buffer_load_dword s7, s[0:3], 0x1d ; C203811D s_buffer_load_dword s8, s[0:3], 0x20 ; C2040120 s_buffer_load_dword s9, s[0:3], 0x21 ; C2048121 s_buffer_load_dword s10, s[0:3], 0x10 ; C2050110 s_buffer_load_dword s11, s[0:3], 0x11 ; C2058111 s_buffer_load_dword s12, s[0:3], 0x12 ; C2060112 s_buffer_load_dword s13, s[0:3], 0x14 ; C2068114 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_sub_f32_e32 v0, s10, v1 ; 0800020A v_mul_f32_e32 v21, s13, v5 ; 102A0A0D v_mad_f32 v21, s4, v6, v21 ; D2820015 04560C04 v_mul_f32_e32 v22, s24, v5 ; 102C0A18 v_mad_f32 v22, s5, v6, v22 ; D2820016 045A0C05 v_mad_f32 v21, s6, v7, v21 ; D2820015 04560E06 v_mad_f32 v22, s7, v7, v22 ; D2820016 045A0E07 v_mad_f32 v21, s8, v8, v21 ; D2820015 04561008 v_mad_f32 v5, s9, v8, v22 ; D2820005 045A1009 v_sub_f32_e32 v6, s11, v2 ; 080C040B v_mul_f32_e32 v7, v9, v0 ; 100E0109 v_mul_f32_e32 v8, v13, v0 ; 1010010D v_mul_f32_e32 v0, v17, v0 ; 10000111 v_mad_f32 v7, v6, v10, v7 ; D2820007 041E1506 v_mad_f32 v8, v6, v14, v8 ; D2820008 04221D06 v_mad_f32 v0, v6, v18, v0 ; D2820000 04022506 v_sub_f32_e32 v6, s12, v3 ; 080C060C v_mad_f32 v7, v6, v11, v7 ; D2820007 041E1706 v_mad_f32 v8, v6, v15, v8 ; D2820008 04221F06 v_mad_f32 v0, v6, v19, v0 ; D2820000 04022706 v_mov_b32_e32 v6, 0 ; 7E0C0280 exp 15, 32, 0, 0, 0, v7, v8, v0, v6 ; F800020F 06000807 exp 15, 33, 0, 0, 0, v21, v5, v6, v6 ; F800021F 06060515 exp 15, 34, 0, 0, 0, v17, v18, v19, v6 ; F800022F 06131211 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F exp 15, 35, 0, 0, 0, v9, v10, v11, v6 ; F800023F 060B0A09 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s8, s[0:3], 0x3 ; C2040103 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_buffer_load_dword s10, s[0:3], 0x5 ; C2050105 s_buffer_load_dword s11, s[0:3], 0x6 ; C2058106 s_buffer_load_dword s12, s[0:3], 0x7 ; C2060107 s_buffer_load_dword s13, s[0:3], 0x8 ; C2068108 s_buffer_load_dword s14, s[0:3], 0x9 ; C2070109 s_buffer_load_dword s15, s[0:3], 0xa ; C207810A s_buffer_load_dword s16, s[0:3], 0xb ; C208010B s_buffer_load_dword s17, s[0:3], 0xc ; C208810C s_buffer_load_dword s18, s[0:3], 0xd ; C209010D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s5, v1 ; 10000205 v_mad_f32 v0, s9, v2, v0 ; D2820000 04020409 v_mul_f32_e32 v5, s6, v1 ; 100A0206 v_mad_f32 v5, s10, v2, v5 ; D2820005 0416040A v_mul_f32_e32 v7, s7, v1 ; 100E0207 v_mad_f32 v7, s11, v2, v7 ; D2820007 041E040B v_mul_f32_e32 v8, s8, v1 ; 10100208 v_mad_f32 v8, s12, v2, v8 ; D2820008 0422040C v_mad_f32 v0, s13, v3, v0 ; D2820000 0402060D v_mad_f32 v5, s14, v3, v5 ; D2820005 0416060E v_mad_f32 v7, s15, v3, v7 ; D2820007 041E060F exp 15, 36, 0, 0, 0, v13, v14, v15, v6 ; F800024F 060F0E0D s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v6, s16, v3, v8 ; D2820006 04220610 v_mad_f32 v0, s17, v4, v0 ; D2820000 04020811 v_mad_f32 v5, s18, v4, v5 ; D2820005 04160812 v_mad_f32 v7, s0, v4, v7 ; D2820007 041E0800 v_mad_f32 v1, s4, v4, v6 ; D2820001 041A0804 exp 15, 12, 0, 1, 0, v0, v5, v7, v1 ; F80008CF 01070500 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 24 Code Size: 488 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL CONST[2..7] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { -0.5000, 2.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D 2: MUL TEMP[1].x, TEMP[0].wwww, CONST[3].xxxx 3: MOV TEMP[1].w, TEMP[1].xxxx 4: MOV TEMP[2].xy, IN[1].xyyy 5: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D 6: ADD TEMP[2].xyz, TEMP[2].xyzz, IMM[0].xxxx 7: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 8: RSQ TEMP[3].x, TEMP[3].xxxx 9: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 10: MOV TEMP[3].xyz, -IN[0].xyzx 11: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[3].xyzz 12: MUL TEMP[2].xyz, TEMP[4].xxxx, TEMP[2].xyzz 13: MUL TEMP[2].xyz, IMM[0].yyyy, TEMP[2].xyzz 14: ADD TEMP[2].xyz, TEMP[3].xyzz, -TEMP[2].xyzz 15: MUL TEMP[3].xyz, TEMP[2].xxxx, IN[3].xyzz 16: MAD TEMP[3].xyz, TEMP[2].yyyy, IN[4].xyzz, TEMP[3].xyzz 17: MAD TEMP[2].xyz, TEMP[2].zzzz, IN[2].xyzz, TEMP[3].xyzz 18: MOV TEMP[3].xy, IN[1].xyyy 19: TEX TEMP[3].xyz, TEMP[3], SAMP[2], 2D 20: MUL TEMP[4], CONST[4], TEMP[2].xxxx 21: MAD TEMP[4], CONST[5], TEMP[2].yyyy, TEMP[4] 22: MAD TEMP[2].xyz, CONST[6], TEMP[2].zzzz, TEMP[4] 23: MOV TEMP[2].xyz, TEMP[2].xyzz 24: TEX TEMP[2].xyz, TEMP[2], SAMP[3], CUBE 25: MAD TEMP[0].xyz, TEMP[3].xyzz, TEMP[2].xyzz, TEMP[0].xyzz 26: MUL TEMP[1].xyz, TEMP[0].xyzz, CONST[2].xyzz 27: MOV OUT[0], TEMP[1] 28: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %37 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %38 = load <32 x i8>, <32 x i8> addrspace(2)* %37, align 32, !tbaa !0 %39 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %42 = bitcast <8 x i32> addrspace(2)* %41 to <32 x i8> addrspace(2)* %43 = load <32 x i8>, <32 x i8> addrspace(2)* %42, align 32, !tbaa !0 %44 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %45 = bitcast <4 x i32> addrspace(2)* %44 to <16 x i8> addrspace(2)* %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %48 = bitcast <8 x i32> addrspace(2)* %47 to <32 x i8> addrspace(2)* %49 = load <32 x i8>, <32 x i8> addrspace(2)* %48, align 32, !tbaa !0 %50 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %51 = bitcast <4 x i32> addrspace(2)* %50 to <16 x i8> addrspace(2)* %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %54 = bitcast <8 x i32> addrspace(2)* %53 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %57 = bitcast <4 x i32> addrspace(2)* %56 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %60 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %61 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %62 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %63 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %66 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %67 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %68 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %71 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %73 = bitcast float %62 to i32 %74 = bitcast float %63 to i32 %75 = insertelement <2 x i32> undef, i32 %73, i32 0 %76 = insertelement <2 x i32> %75, i32 %74, i32 1 %77 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %76, <32 x i8> %43, <16 x i8> %46, i32 2) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = extractelement <4 x float> %77, i32 2 %81 = extractelement <4 x float> %77, i32 3 %82 = fmul float %81, %27 %83 = bitcast float %62 to i32 %84 = bitcast float %63 to i32 %85 = insertelement <2 x i32> undef, i32 %83, i32 0 %86 = insertelement <2 x i32> %85, i32 %84, i32 1 %87 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %86, <32 x i8> %38, <16 x i8> %40, i32 2) %88 = extractelement <4 x float> %87, i32 0 %89 = extractelement <4 x float> %87, i32 1 %90 = extractelement <4 x float> %87, i32 2 %91 = fadd float %88, -5.000000e-01 %92 = fadd float %89, -5.000000e-01 %93 = fadd float %90, -5.000000e-01 %94 = fmul float %91, %91 %95 = fmul float %92, %92 %96 = fadd float %95, %94 %97 = fmul float %93, %93 %98 = fadd float %96, %97 %99 = call float @llvm.AMDGPU.rsq.clamped.f32(float %98) %100 = fmul float %91, %99 %101 = fmul float %92, %99 %102 = fmul float %93, %99 %103 = fmul float %59, %100 %104 = fsub float -0.000000e+00, %103 %105 = fmul float %60, %101 %106 = fsub float %104, %105 %107 = fmul float %61, %102 %108 = fsub float %106, %107 %109 = fmul float %108, %100 %110 = fmul float %108, %101 %111 = fmul float %108, %102 %112 = fmul float %109, 2.000000e+00 %113 = fmul float %110, 2.000000e+00 %114 = fmul float %111, 2.000000e+00 %115 = fsub float -0.000000e+00, %112 %116 = fsub float %115, %59 %117 = fsub float -0.000000e+00, %113 %118 = fsub float %117, %60 %119 = fsub float -0.000000e+00, %114 %120 = fsub float %119, %61 %121 = fmul float %116, %67 %122 = fmul float %116, %68 %123 = fmul float %116, %69 %124 = fmul float %118, %70 %125 = fadd float %124, %121 %126 = fmul float %118, %71 %127 = fadd float %126, %122 %128 = fmul float %118, %72 %129 = fadd float %128, %123 %130 = fmul float %120, %64 %131 = fadd float %130, %125 %132 = fmul float %120, %65 %133 = fadd float %132, %127 %134 = fmul float %120, %66 %135 = fadd float %134, %129 %136 = bitcast float %62 to i32 %137 = bitcast float %63 to i32 %138 = insertelement <2 x i32> undef, i32 %136, i32 0 %139 = insertelement <2 x i32> %138, i32 %137, i32 1 %140 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %139, <32 x i8> %49, <16 x i8> %52, i32 2) %141 = extractelement <4 x float> %140, i32 0 %142 = extractelement <4 x float> %140, i32 1 %143 = extractelement <4 x float> %140, i32 2 %144 = fmul float %28, %131 %145 = fmul float %29, %131 %146 = fmul float %30, %131 %147 = fmul float %31, %133 %148 = fadd float %147, %144 %149 = fmul float %32, %133 %150 = fadd float %149, %145 %151 = fmul float %33, %133 %152 = fadd float %151, %146 %153 = fmul float %34, %135 %154 = fadd float %153, %148 %155 = fmul float %35, %135 %156 = fadd float %155, %150 %157 = fmul float %36, %135 %158 = fadd float %157, %152 %159 = insertelement <4 x float> undef, float %154, i32 0 %160 = insertelement <4 x float> %159, float %156, i32 1 %161 = insertelement <4 x float> %160, float %158, i32 2 %162 = insertelement <4 x float> %161, float 0.000000e+00, i32 3 %163 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %162) %164 = extractelement <4 x float> %163, i32 0 %165 = extractelement <4 x float> %163, i32 1 %166 = extractelement <4 x float> %163, i32 2 %167 = extractelement <4 x float> %163, i32 3 %168 = call float @fabs(float %166) %169 = fdiv float 1.000000e+00, %168 %170 = fmul float %164, %169 %171 = fadd float %170, 1.500000e+00 %172 = fmul float %165, %169 %173 = fadd float %172, 1.500000e+00 %174 = bitcast float %173 to i32 %175 = bitcast float %171 to i32 %176 = bitcast float %167 to i32 %177 = insertelement <4 x i32> undef, i32 %174, i32 0 %178 = insertelement <4 x i32> %177, i32 %175, i32 1 %179 = insertelement <4 x i32> %178, i32 %176, i32 2 %180 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %179, <32 x i8> %55, <16 x i8> %58, i32 4) %181 = extractelement <4 x float> %180, i32 0 %182 = extractelement <4 x float> %180, i32 1 %183 = extractelement <4 x float> %180, i32 2 %184 = fmul float %141, %181 %185 = fadd float %184, %78 %186 = fmul float %142, %182 %187 = fadd float %186, %79 %188 = fmul float %143, %183 %189 = fadd float %188, %80 %190 = fmul float %185, %24 %191 = fmul float %187, %25 %192 = fmul float %189, %26 %193 = call i32 @llvm.SI.packf16(float %190, float %191) %194 = bitcast i32 %193 to float %195 = call i32 @llvm.SI.packf16(float %192, float %82) %196 = bitcast i32 %195 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %194, float %196, float %194, float %196) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s56, s[0:3], 0x11 ; C21C0111 s_buffer_load_dword s57, s[0:3], 0x12 ; C21C8112 s_buffer_load_dword s58, s[0:3], 0x14 ; C21D0114 s_buffer_load_dword s59, s[0:3], 0x15 ; C21D8115 s_buffer_load_dword s60, s[0:3], 0x16 ; C21E0116 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 0, 3, [m0] ; C8280C00 v_interp_p2_f32 v10, [v10], v1, 0, 3, [m0] ; C8290C01 v_interp_p1_f32 v11, v0, 1, 3, [m0] ; C82C0D00 v_interp_p2_f32 v11, [v11], v1, 1, 3, [m0] ; C82D0D01 v_interp_p1_f32 v12, v0, 2, 3, [m0] ; C8300E00 v_interp_p2_f32 v12, [v12], v1, 2, 3, [m0] ; C8310E01 v_interp_p1_f32 v13, v0, 0, 4, [m0] ; C8341000 v_interp_p2_f32 v13, [v13], v1, 0, 4, [m0] ; C8351001 v_interp_p1_f32 v14, v0, 1, 4, [m0] ; C8381100 v_interp_p2_f32 v14, [v14], v1, 1, 4, [m0] ; C8391101 v_interp_p1_f32 v15, v0, 2, 4, [m0] ; C83C1200 v_interp_p2_f32 v15, [v15], v1, 2, 4, [m0] ; C83D1201 s_buffer_load_dword s61, s[0:3], 0x10 ; C21E8110 s_load_dwordx4 s[44:47], s[4:5], 0x4 ; C0960504 s_load_dwordx4 s[24:27], s[4:5], 0x8 ; C08C0508 s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_load_dwordx8 s[48:55], s[6:7], 0x8 ; C0D80708 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 s_load_dwordx8 s[12:19], s[6:7], 0x18 ; C0C60718 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[48:55], s[44:47] ; F0800F00 016C1005 image_sample v[20:22], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[28:35], s[20:23] ; F0800700 00A71405 image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[36:43], s[24:27] ; F0800700 00C91705 s_waitcnt vmcnt(1) ; BF8C0771 v_add_f32_e32 v0, -0.5, v20 ; 060028F1 v_add_f32_e32 v1, -0.5, v21 ; 06022AF1 v_add_f32_e32 v5, -0.5, v22 ; 060A2CF1 v_mul_f32_e32 v6, v0, v0 ; 100C0100 v_mad_f32 v6, v1, v1, v6 ; D2820006 041A0301 v_mad_f32 v6, v5, v5, v6 ; D2820006 041A0B05 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_mul_f32_e32 v6, v0, v2 ; 100C0500 v_mad_f32 v6, -v3, v1, -v6 ; D2820006 A41A0303 v_mad_f32 v6, -v4, v5, v6 ; D2820006 241A0B04 v_mul_f32_e32 v20, v0, v6 ; 10280D00 v_mad_f32 v0, v6, v0, v20 ; D2820000 04520106 v_mul_f32_e32 v20, v1, v6 ; 10280D01 v_mad_f32 v1, v6, v1, v20 ; D2820001 04520306 v_mul_f32_e32 v20, v5, v6 ; 10280D05 v_mad_f32 v5, v6, v5, v20 ; D2820005 04520B06 v_sub_f32_e64 v0, -v0, v2 ; D2080000 20020500 v_sub_f32_e64 v1, -v1, v3 ; D2080001 20020701 v_sub_f32_e64 v2, -v5, v4 ; D2080002 20020905 v_mul_f32_e32 v3, v10, v0 ; 1006010A v_mad_f32 v3, v1, v13, v3 ; D2820003 040E1B01 v_mul_f32_e32 v4, v11, v0 ; 1008010B v_mad_f32 v4, v1, v14, v4 ; D2820004 04121D01 v_mad_f32 v3, v2, v7, v3 ; D2820003 040E0F02 v_mul_f32_e32 v5, s61, v3 ; 100A063D v_mul_f32_e32 v6, s56, v3 ; 100C0638 v_mul_f32_e32 v3, s57, v3 ; 10060639 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_buffer_load_dword s5, s[0:3], 0x19 ; C2028119 s_buffer_load_dword s6, s[0:3], 0x1a ; C203011A v_mad_f32 v4, v2, v8, v4 ; D2820004 04121102 v_mad_f32 v5, s58, v4, v5 ; D2820005 0416083A v_mad_f32 v6, s59, v4, v6 ; D2820006 041A083B v_mad_f32 v3, s60, v4, v3 ; D2820003 040E083C v_mul_f32_e32 v0, v12, v0 ; 1000010C v_mad_f32 v0, v1, v15, v0 ; D2820000 04021F01 v_mad_f32 v0, v2, v9, v0 ; D2820000 04021302 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v7, s4, v0, v5 ; D2820007 04160004 v_mad_f32 v8, s5, v0, v6 ; D2820008 041A0005 v_mad_f32 v9, s6, v0, v3 ; D2820009 040E0006 v_mov_b32_e32 v10, 0 ; 7E140280 v_cubeid_f32 v29, v7, v8, v9 ; D288001D 04261107 v_cubema_f32 v28, v7, v8, v9 ; D28E001C 04261107 v_cubesc_f32 v27, v7, v8, v9 ; D28A001B 04261107 v_cubetc_f32 v26, v7, v8, v9 ; D28C001A 04261107 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C v_rcp_f32_e64 v0, |v28| ; D3540100 0000011C s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109 s_buffer_load_dword s0, s[0:3], 0xa ; C200010A v_mov_b32_e32 v1, 0x3fc00000 ; 7E0202FF 3FC00000 v_mad_f32 v28, v26, v0, v1 ; D282001C 0406011A v_mad_f32 v27, v27, v0, v1 ; D282001B 0406011B image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[12:19], s[8:11] ; F0800700 0043001B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v3, v23, v0, v16 ; D2820003 04420117 v_mad_f32 v4, v24, v1, v17 ; D2820004 04460318 v_mad_f32 v0, v25, v2, v18 ; D2820000 044A0519 v_mul_f32_e32 v1, s4, v19 ; 10022604 v_mul_f32_e32 v2, s5, v3 ; 10040605 v_mul_f32_e32 v3, s6, v4 ; 10060806 v_cvt_pkrtz_f16_f32_e32 v2, v2, v3 ; 5E040702 v_mul_f32_e32 v0, s0, v0 ; 10000000 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 36 Code Size: 636 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..7] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[4], IN[1].xxxx 5: MAD TEMP[1], CONST[5], IN[1].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[6], IN[1].zzzz, TEMP[1] 7: MAD TEMP[1].xy, CONST[7], IN[1].wwww, TEMP[1] 8: MOV TEMP[1].xy, TEMP[1].xyxx 9: MOV OUT[1], TEMP[0] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[2], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = extractelement <4 x float> %46, i32 3 %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = add i32 %5, %7 %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %53) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = extractelement <4 x float> %54, i32 3 %59 = fmul float %13, %47 %60 = fmul float %14, %47 %61 = fmul float %15, %47 %62 = fmul float %16, %47 %63 = fmul float %17, %48 %64 = fadd float %63, %59 %65 = fmul float %18, %48 %66 = fadd float %65, %60 %67 = fmul float %19, %48 %68 = fadd float %67, %61 %69 = fmul float %20, %48 %70 = fadd float %69, %62 %71 = fmul float %21, %49 %72 = fadd float %71, %64 %73 = fmul float %22, %49 %74 = fadd float %73, %66 %75 = fmul float %23, %49 %76 = fadd float %75, %68 %77 = fmul float %24, %49 %78 = fadd float %77, %70 %79 = fmul float %25, %50 %80 = fadd float %79, %72 %81 = fmul float %26, %50 %82 = fadd float %81, %74 %83 = fmul float %27, %50 %84 = fadd float %83, %76 %85 = fmul float %28, %50 %86 = fadd float %85, %78 %87 = fmul float %29, %55 %88 = fmul float %30, %55 %89 = fmul float %31, %55 %90 = fmul float %32, %55 %91 = fmul float %33, %56 %92 = fadd float %91, %87 %93 = fmul float %34, %56 %94 = fadd float %93, %88 %95 = fmul float %35, %56 %96 = fadd float %95, %89 %97 = fmul float %36, %56 %98 = fadd float %97, %90 %99 = fmul float %37, %57 %100 = fadd float %99, %92 %101 = fmul float %38, %57 %102 = fadd float %101, %94 %103 = fmul float %39, %57 %104 = fadd float %103, %96 %105 = fmul float %40, %57 %106 = fadd float %105, %98 %107 = fmul float %41, %58 %108 = fadd float %107, %100 %109 = fmul float %42, %58 %110 = fadd float %109, %102 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %80, float %82, float %84, float %86) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %108, float %110, float %104, float %106) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %80, float %82, float %84, float %86) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x4 ; C2028104 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x2 ; C2040102 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x3 ; C2050103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v1 ; 10000204 v_mad_f32 v0, s5, v2, v0 ; D2820000 04020405 v_mul_f32_e32 v9, s6, v1 ; 10120206 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109 v_mad_f32 v9, s7, v2, v9 ; D2820009 04260407 v_mul_f32_e32 v10, s8, v1 ; 10140208 v_mad_f32 v10, s9, v2, v10 ; D282000A 042A0409 s_buffer_load_dword s7, s[0:3], 0xa ; C203810A s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_buffer_load_dword s9, s[0:3], 0xc ; C204810C s_buffer_load_dword s11, s[0:3], 0xd ; C205810D s_buffer_load_dword s12, s[0:3], 0xe ; C206010E v_mul_f32_e32 v11, s10, v1 ; 1016020A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v11, s4, v2, v11 ; D282000B 042E0404 v_mad_f32 v0, s5, v3, v0 ; D2820000 04020605 v_mad_f32 v9, s6, v3, v9 ; D2820009 04260606 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F v_mad_f32 v10, s7, v3, v10 ; D282000A 042A0607 v_mad_f32 v11, s8, v3, v11 ; D282000B 042E0608 v_mad_f32 v0, s9, v4, v0 ; D2820000 04020809 v_mad_f32 v9, s11, v4, v9 ; D2820009 0426080B v_mad_f32 v10, s12, v4, v10 ; D282000A 042A080C s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_buffer_load_dword s6, s[0:3], 0x11 ; C2030111 s_buffer_load_dword s7, s[0:3], 0x12 ; C2038112 s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s4, v4, v11 ; D2820001 042E0804 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_buffer_load_dword s9, s[0:3], 0x15 ; C2048115 s_buffer_load_dword s10, s[0:3], 0x16 ; C2050116 s_buffer_load_dword s11, s[0:3], 0x17 ; C2058117 s_buffer_load_dword s12, s[0:3], 0x18 ; C2060118 s_buffer_load_dword s13, s[0:3], 0x19 ; C2068119 s_buffer_load_dword s14, s[0:3], 0x1a ; C207011A s_buffer_load_dword s15, s[0:3], 0x1b ; C207811B s_buffer_load_dword s16, s[0:3], 0x1c ; C208011C s_buffer_load_dword s0, s[0:3], 0x1d ; C200011D v_mul_f32_e32 v2, s5, v5 ; 10040A05 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s4, v6, v2 ; D2820002 040A0C04 v_mul_f32_e32 v3, s6, v5 ; 10060A06 v_mad_f32 v3, s9, v6, v3 ; D2820003 040E0C09 v_mul_f32_e32 v4, s7, v5 ; 10080A07 v_mad_f32 v4, s10, v6, v4 ; D2820004 04120C0A v_mul_f32_e32 v11, s8, v5 ; 10160A08 v_mad_f32 v11, s11, v6, v11 ; D282000B 042E0C0B v_mad_f32 v2, s12, v7, v2 ; D2820002 040A0E0C v_mad_f32 v3, s13, v7, v3 ; D2820003 040E0E0D v_mad_f32 v4, s14, v7, v4 ; D2820004 04120E0E v_mad_f32 v11, s15, v7, v11 ; D282000B 042E0E0F v_mad_f32 v2, s16, v8, v2 ; D2820002 040A1010 v_mad_f32 v3, s0, v8, v3 ; D2820003 040E1000 exp 15, 32, 0, 0, 0, v0, v9, v10, v1 ; F800020F 010A0900 exp 15, 33, 0, 0, 0, v2, v3, v4, v11 ; F800021F 0B040302 exp 15, 12, 0, 1, 0, v0, v9, v10, v1 ; F80008CF 010A0900 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 412 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[2..5] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { -0.5000, 0.0100, -0.0100, 1.0000} IMM[1] FLT32 { 20.0000, 0.0000, 0.0000, 0.0000} 0: RCP TEMP[0].x, IN[0].wwww 1: MUL TEMP[0].xy, CONST[3].xyyy, TEMP[0].xxxx 2: MAD TEMP[0].xy, IN[0].xyyy, TEMP[0].xyyy, CONST[4].xyyy 3: MOV TEMP[1].xy, IN[1].xyyy 4: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D 5: ADD TEMP[1].xyz, TEMP[1].xyzz, IMM[0].xxxx 6: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 7: RSQ TEMP[2].x, TEMP[2].xxxx 8: MUL TEMP[1].xy, TEMP[1].xyzz, TEMP[2].xxxx 9: MAD TEMP[1].xy, TEMP[1].xyyy, CONST[2].xyyy, TEMP[0].xyyy 10: ADD TEMP[2].xy, TEMP[1].xyyy, IMM[0].yyyy 11: MOV TEMP[2].xy, TEMP[2].xyyy 12: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D 13: ADD TEMP[3].xy, TEMP[1].xyyy, IMM[0].yzzz 14: MOV TEMP[3].xy, TEMP[3].xyyy 15: TEX TEMP[3].xyz, TEMP[3], SAMP[1], 2D 16: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz 17: SQRT TEMP[2].x, TEMP[2].xxxx 18: MUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx 19: MIN TEMP[2].x, IMM[0].wwww, TEMP[2].xxxx 20: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz 21: SQRT TEMP[3].x, TEMP[3].xxxx 22: MUL TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx 23: MIN TEMP[3].x, IMM[0].wwww, TEMP[3].xxxx 24: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx 25: ADD TEMP[3].xy, TEMP[1].xyyy, IMM[0].zyyy 26: MOV TEMP[3].xy, TEMP[3].xyyy 27: TEX TEMP[3].xyz, TEMP[3], SAMP[1], 2D 28: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz 29: SQRT TEMP[3].x, TEMP[3].xxxx 30: MUL TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx 31: MIN TEMP[3].x, IMM[0].wwww, TEMP[3].xxxx 32: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx 33: ADD TEMP[3].xy, TEMP[1].xyyy, IMM[0].zzzz 34: MOV TEMP[3].xy, TEMP[3].xyyy 35: TEX TEMP[3].xyz, TEMP[3], SAMP[1], 2D 36: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz 37: SQRT TEMP[3].x, TEMP[3].xxxx 38: MUL TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx 39: MIN TEMP[3].x, IMM[0].wwww, TEMP[3].xxxx 40: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx 41: MOV TEMP[3].w, IMM[0].wwww 42: LRP TEMP[0].xy, TEMP[2].xxxx, TEMP[1].xyyy, TEMP[0].xyyy 43: MOV TEMP[0].xy, TEMP[0].xyyy 44: TEX TEMP[0].xyz, TEMP[0], SAMP[1], 2D 45: MOV TEMP[3].xyz, TEMP[0].xyzx 46: MUL TEMP[0], TEMP[3], CONST[5] 47: MOV OUT[0], TEMP[0] 48: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %34 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %35 = load <32 x i8>, <32 x i8> addrspace(2)* %34, align 32, !tbaa !0 %36 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 %38 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %39 = load <8 x i32>, <8 x i32> addrspace(2)* %38, align 32, !tbaa !0 %40 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %41 = load <4 x i32>, <4 x i32> addrspace(2)* %40, align 16, !tbaa !0 %42 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %47 = fdiv float 1.000000e+00, %44 %48 = fmul float %26, %47 %49 = fmul float %27, %47 %50 = fmul float %42, %48 %51 = fadd float %50, %28 %52 = fmul float %43, %49 %53 = fadd float %52, %29 %54 = bitcast float %45 to i32 %55 = bitcast float %46 to i32 %56 = insertelement <2 x i32> undef, i32 %54, i32 0 %57 = insertelement <2 x i32> %56, i32 %55, i32 1 %58 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %57, <32 x i8> %35, <16 x i8> %37, i32 2) %59 = extractelement <4 x float> %58, i32 0 %60 = extractelement <4 x float> %58, i32 1 %61 = extractelement <4 x float> %58, i32 2 %62 = fadd float %59, -5.000000e-01 %63 = fadd float %60, -5.000000e-01 %64 = fadd float %61, -5.000000e-01 %65 = fmul float %62, %62 %66 = fmul float %63, %63 %67 = fadd float %66, %65 %68 = fmul float %64, %64 %69 = fadd float %67, %68 %70 = call float @llvm.AMDGPU.rsq.clamped.f32(float %69) %71 = fmul float %62, %70 %72 = fmul float %63, %70 %73 = fmul float %71, %24 %74 = fadd float %73, %51 %75 = fmul float %72, %25 %76 = fadd float %75, %53 %77 = fadd float %74, 0x3F847AE140000000 %78 = fadd float %76, 0x3F847AE140000000 %79 = bitcast float %77 to i32 %80 = bitcast float %78 to i32 %81 = insertelement <2 x i32> undef, i32 %79, i32 0 %82 = insertelement <2 x i32> %81, i32 %80, i32 1 %83 = bitcast <8 x i32> %39 to <32 x i8> %84 = bitcast <4 x i32> %41 to <16 x i8> %85 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %82, <32 x i8> %83, <16 x i8> %84, i32 2) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = fadd float %74, 0x3F847AE140000000 %90 = fadd float %76, 0xBF847AE140000000 %91 = bitcast float %89 to i32 %92 = bitcast float %90 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = bitcast <8 x i32> %39 to <32 x i8> %96 = bitcast <4 x i32> %41 to <16 x i8> %97 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %94, <32 x i8> %95, <16 x i8> %96, i32 2) %98 = extractelement <4 x float> %97, i32 0 %99 = extractelement <4 x float> %97, i32 1 %100 = extractelement <4 x float> %97, i32 2 %101 = fmul float %86, %86 %102 = fmul float %87, %87 %103 = fadd float %102, %101 %104 = fmul float %88, %88 %105 = fadd float %103, %104 %106 = call float @llvm.sqrt.f32(float %105) %107 = fmul float %106, 2.000000e+01 %108 = call float @llvm.minnum.f32(float %107, float 1.000000e+00) %109 = fmul float %98, %98 %110 = fmul float %99, %99 %111 = fadd float %110, %109 %112 = fmul float %100, %100 %113 = fadd float %111, %112 %114 = call float @llvm.sqrt.f32(float %113) %115 = fmul float %114, 2.000000e+01 %116 = call float @llvm.minnum.f32(float %115, float 1.000000e+00) %117 = fmul float %108, %116 %118 = fadd float %74, 0xBF847AE140000000 %119 = fadd float %76, 0x3F847AE140000000 %120 = bitcast float %118 to i32 %121 = bitcast float %119 to i32 %122 = insertelement <2 x i32> undef, i32 %120, i32 0 %123 = insertelement <2 x i32> %122, i32 %121, i32 1 %124 = bitcast <8 x i32> %39 to <32 x i8> %125 = bitcast <4 x i32> %41 to <16 x i8> %126 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %123, <32 x i8> %124, <16 x i8> %125, i32 2) %127 = extractelement <4 x float> %126, i32 0 %128 = extractelement <4 x float> %126, i32 1 %129 = extractelement <4 x float> %126, i32 2 %130 = fmul float %127, %127 %131 = fmul float %128, %128 %132 = fadd float %131, %130 %133 = fmul float %129, %129 %134 = fadd float %132, %133 %135 = call float @llvm.sqrt.f32(float %134) %136 = fmul float %135, 2.000000e+01 %137 = call float @llvm.minnum.f32(float %136, float 1.000000e+00) %138 = fmul float %117, %137 %139 = fadd float %74, 0xBF847AE140000000 %140 = fadd float %76, 0xBF847AE140000000 %141 = bitcast float %139 to i32 %142 = bitcast float %140 to i32 %143 = insertelement <2 x i32> undef, i32 %141, i32 0 %144 = insertelement <2 x i32> %143, i32 %142, i32 1 %145 = bitcast <8 x i32> %39 to <32 x i8> %146 = bitcast <4 x i32> %41 to <16 x i8> %147 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %144, <32 x i8> %145, <16 x i8> %146, i32 2) %148 = extractelement <4 x float> %147, i32 0 %149 = extractelement <4 x float> %147, i32 1 %150 = extractelement <4 x float> %147, i32 2 %151 = fmul float %148, %148 %152 = fmul float %149, %149 %153 = fadd float %152, %151 %154 = fmul float %150, %150 %155 = fadd float %153, %154 %156 = call float @llvm.sqrt.f32(float %155) %157 = fmul float %156, 2.000000e+01 %158 = call float @llvm.minnum.f32(float %157, float 1.000000e+00) %159 = fmul float %138, %158 %160 = call float @llvm.AMDGPU.lrp(float %159, float %74, float %51) %161 = call float @llvm.AMDGPU.lrp(float %159, float %76, float %53) %162 = bitcast float %160 to i32 %163 = bitcast float %161 to i32 %164 = insertelement <2 x i32> undef, i32 %162, i32 0 %165 = insertelement <2 x i32> %164, i32 %163, i32 1 %166 = bitcast <8 x i32> %39 to <32 x i8> %167 = bitcast <4 x i32> %41 to <16 x i8> %168 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %165, <32 x i8> %166, <16 x i8> %167, i32 2) %169 = extractelement <4 x float> %168, i32 0 %170 = extractelement <4 x float> %168, i32 1 %171 = extractelement <4 x float> %168, i32 2 %172 = fmul float %169, %30 %173 = fmul float %170, %31 %174 = fmul float %171, %32 %175 = call i32 @llvm.SI.packf16(float %172, float %173) %176 = bitcast i32 %175 to float %177 = call i32 @llvm.SI.packf16(float %174, float %33) %178 = bitcast i32 %177 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %176, float %178, float %176, float %178) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx4 s[0:3], s[4:5], 0x4 ; C0800504 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_rcp_f32_e32 v4, v4 ; 7E085504 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s16, s[8:11], 0x8 ; C2080908 s_buffer_load_dword s17, s[8:11], 0x9 ; C2088909 s_buffer_load_dword s4, s[8:11], 0xc ; C202090C s_buffer_load_dword s5, s[8:11], 0xd ; C202890D s_buffer_load_dword s18, s[8:11], 0x10 ; C2090910 s_buffer_load_dword s19, s[8:11], 0x11 ; C2098911 s_buffer_load_dword s20, s[8:11], 0x14 ; C20A0914 s_buffer_load_dword s21, s[8:11], 0x15 ; C20A8915 s_buffer_load_dword s22, s[8:11], 0x16 ; C20B0916 s_buffer_load_dword s23, s[8:11], 0x17 ; C20B8917 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v4 ; 10000804 v_mad_f32 v0, v2, v0, s18 ; D2820000 004A0102 v_mul_f32_e32 v1, s5, v4 ; 10020805 v_mad_f32 v1, v3, v1, s19 ; D2820001 004E0303 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx8 s[4:11], s[6:7], 0x8 ; C0C20708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[24:31], s[12:15] ; F0800700 00660205 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v5, -0.5, v2 ; 060A04F1 v_add_f32_e32 v6, -0.5, v3 ; 060C06F1 v_add_f32_e32 v2, -0.5, v4 ; 060408F1 v_mul_f32_e32 v3, v5, v5 ; 10060B05 v_mad_f32 v3, v6, v6, v3 ; D2820003 040E0D06 v_mad_f32 v2, v2, v2, v3 ; D2820002 040E0502 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_mul_f32_e32 v3, v2, v5 ; 10060B02 v_mul_f32_e32 v2, v2, v6 ; 10040D02 v_mad_f32 v3, v3, s16, v0 ; D2820003 04002103 v_mad_f32 v2, v2, s17, v1 ; D2820002 04042302 v_add_f32_e32 v4, 0x3c23d70a, v3 ; 060806FF 3C23D70A v_add_f32_e32 v5, 0x3c23d70a, v2 ; 060A04FF 3C23D70A image_sample v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800700 00010604 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v9, v6, v6 ; 10120D06 v_mad_f32 v9, v7, v7, v9 ; D2820009 04260F07 v_mov_b32_e32 v10, 0xbc23d70a ; 7E1402FF BC23D70A v_add_f32_e32 v11, v10, v2 ; 0616050A v_mov_b32_e32 v12, v4 ; 7E180304 v_mov_b32_e32 v13, v5 ; 7E1A0305 v_mad_f32 v6, v8, v8, v9 ; D2820006 04261108 v_mov_b32_e32 v13, v11 ; 7E1A030B image_sample v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[4:11], s[0:3] ; F0800700 0001070C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v12, v7, v7 ; 10180F07 v_mad_f32 v12, v8, v8, v12 ; D282000C 04321108 v_mad_f32 v7, v9, v9, v12 ; D2820007 04321309 v_add_f32_e32 v4, v10, v3 ; 0608070A image_sample v[8:10], 7, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800700 00010804 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v12, v8, v8 ; 10181108 v_mad_f32 v12, v9, v9, v12 ; D282000C 04321309 v_mad_f32 v8, v10, v10, v12 ; D2820008 0432150A v_mov_b32_e32 v5, v11 ; 7E0A030B image_sample v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800700 00010904 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, v9, v9 ; 10081309 v_mad_f32 v4, v10, v10, v4 ; D2820004 0412150A v_mad_f32 v4, v11, v11, v4 ; D2820004 0412170B v_sqrt_f32_e32 v5, v6 ; 7E0A6706 v_mul_f32_e32 v5, 0x41a00000, v5 ; 100A0AFF 41A00000 v_min_f32_e32 v5, 1.0, v5 ; 1E0A0AF2 v_sqrt_f32_e32 v6, v7 ; 7E0C6707 v_mul_f32_e32 v6, 0x41a00000, v6 ; 100C0CFF 41A00000 v_min_f32_e32 v6, 1.0, v6 ; 1E0C0CF2 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_sqrt_f32_e32 v6, v8 ; 7E0C6708 v_mul_f32_e32 v6, 0x41a00000, v6 ; 100C0CFF 41A00000 v_min_f32_e32 v6, 1.0, v6 ; 1E0C0CF2 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_sqrt_f32_e32 v4, v4 ; 7E086704 v_mul_f32_e32 v4, 0x41a00000, v4 ; 100808FF 41A00000 v_min_f32_e32 v4, 1.0, v4 ; 1E0808F2 v_mul_f32_e32 v6, v4, v5 ; 100C0B04 v_mad_f32 v4, -v5, v4, 1.0 ; D2820004 23CA0905 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_mul_f32_e32 v1, v1, v4 ; 10020901 v_mad_f32 v3, v6, v3, v0 ; D2820003 04020706 v_mad_f32 v4, v6, v2, v1 ; D2820004 04060506 image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[4:11], s[0:3] ; F0800700 00010003 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, s20, v0 ; 10060014 v_mul_f32_e32 v4, s21, v1 ; 10080215 v_mul_f32_e32 v0, s22, v2 ; 10000416 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 v_cvt_pkrtz_f16_f32_e64 v0, v0, s23 ; D25E0000 00002F00 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Code Size: 552 bytes LDS: 0 blocks Scratch: 0 bytes per wave ********************