SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = extractelement <4 x float> %14, i32 3 %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 %21 = add i32 %5, %7 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = extractelement <4 x float> %22, i32 2 %26 = extractelement <4 x float> %22, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call i32 @llvm.SI.packf16(float %33, float %34) %38 = bitcast i32 %37 to float %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %38, float %40, float %38, float %40) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v4, v0, v1 ; 5E080300 v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702 exp 15, 0, 1, 1, 1, v4, v0, v4, v0 ; F8001C0F 00040004 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_interp_mov_f32 v2, P0, 3, 0, [m0] ; C80A0302 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 4 Code Size: 40 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL CONST[0..3] DCL TEMP[0] 0: MUL TEMP[0], IN[0].xxxx, CONST[0] 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %33, %13 %38 = fmul float %33, %14 %39 = fmul float %33, %15 %40 = fmul float %33, %16 %41 = fmul float %34, %17 %42 = fadd float %41, %37 %43 = fmul float %34, %18 %44 = fadd float %43, %38 %45 = fmul float %34, %19 %46 = fadd float %45, %39 %47 = fmul float %34, %20 %48 = fadd float %47, %40 %49 = fmul float %35, %21 %50 = fadd float %49, %42 %51 = fmul float %35, %22 %52 = fadd float %51, %44 %53 = fmul float %35, %23 %54 = fadd float %53, %46 %55 = fmul float %35, %24 %56 = fadd float %55, %48 %57 = fmul float %36, %25 %58 = fadd float %57, %50 %59 = fmul float %36, %26 %60 = fadd float %59, %52 %61 = fmul float %36, %27 %62 = fadd float %61, %54 %63 = fmul float %36, %28 %64 = fadd float %63, %56 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %60, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v0 ; 10080004 v_mad_f32 v4, v1, s8, v4 ; D2820004 04101101 v_mul_f32_e32 v5, s5, v0 ; 100A0005 v_mad_f32 v5, v1, s9, v5 ; D2820005 04141301 v_mul_f32_e32 v6, s6, v0 ; 100C0006 v_mad_f32 v6, v1, s10, v6 ; D2820006 04181501 v_mul_f32_e32 v7, s7, v0 ; 100E0007 v_mad_f32 v7, v1, s11, v7 ; D2820007 041C1701 v_mad_f32 v4, v2, s12, v4 ; D2820004 04101902 v_mad_f32 v5, v2, s13, v5 ; D2820005 04141B02 v_mad_f32 v6, v2, s14, v6 ; D2820006 04181D02 v_mad_f32 v7, v2, s15, v7 ; D2820007 041C1F02 v_mad_f32 v4, v3, s16, v4 ; D2820004 04102103 v_mad_f32 v5, v3, s17, v5 ; D2820005 04142303 v_mad_f32 v6, v3, s18, v6 ; D2820006 04182503 v_mad_f32 v0, v3, s0, v7 ; D2820000 041C0103 exp 15, 12, 0, 1, 0, v4, v5, v6, v0 ; F80008CF 00060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 220 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL OUT[0], COLOR DCL CONST[0..3] 0: MOV OUT[0], CONST[3] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %26, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_buffer_load_dword s6, s[0:3], 0xe ; C203010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s5 ; 7E000205 v_cvt_pkrtz_f16_f32_e32 v0, s4, v0 ; 5E000004 v_mov_b32_e32 v1, s0 ; 7E020200 v_cvt_pkrtz_f16_f32_e32 v1, s6, v1 ; 5E020206 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_interp_mov_f32 v2, P0, 3, 0, [m0] ; C80A0302 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 4 Code Size: 40 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..2] IMM[0] FLT32 { 0.0000, 0.1250, 0.0000, 0.0000} IMM[1] UINT32 {0, 1, 2, 3} IMM[2] UINT32 {4, 5, 6, 7} 0: MOV TEMP[0], IMM[0].xxxx 1: F2U TEMP[1], IN[0] 2: MOV TEMP[1].w, IMM[1].xxxx 3: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 4: ADD TEMP[0], TEMP[0], TEMP[2] 5: MOV TEMP[1].w, IMM[1].yyyy 6: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 7: ADD TEMP[0], TEMP[0], TEMP[2] 8: MOV TEMP[1].w, IMM[1].zzzz 9: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 10: ADD TEMP[0], TEMP[0], TEMP[2] 11: MOV TEMP[1].w, IMM[1].wwww 12: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 13: ADD TEMP[0], TEMP[0], TEMP[2] 14: MOV TEMP[1].w, IMM[2].xxxx 15: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 16: ADD TEMP[0], TEMP[0], TEMP[2] 17: MOV TEMP[1].w, IMM[2].yyyy 18: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 19: ADD TEMP[0], TEMP[0], TEMP[2] 20: MOV TEMP[1].w, IMM[2].zzzz 21: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 22: ADD TEMP[0], TEMP[0], TEMP[2] 23: MOV TEMP[1].w, IMM[2].wwww 24: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 25: ADD TEMP[0], TEMP[0], TEMP[2] 26: MUL TEMP[0], TEMP[0], IMM[0].yyyy 27: MOV OUT[0], TEMP[0] 28: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0 %24 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 17 %25 = load <8 x i32>, <8 x i32> addrspace(2)* %24, align 32, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = fptoui float %26 to i32 %29 = fptoui float %27 to i32 %30 = insertelement <4 x i32> undef, i32 %28, i32 0 %31 = insertelement <4 x i32> %30, i32 %29, i32 1 %32 = insertelement <4 x i32> %31, i32 0, i32 2 %33 = bitcast <8 x i32> %25 to <32 x i8> %34 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %32, <32 x i8> %33, i32 2) %35 = extractelement <4 x i32> %34, i32 0 %36 = and i32 %35, 15 %37 = extractelement <8 x i32> %25, i32 1 %38 = icmp ne i32 %37, 0 %39 = select i1 %38, i32 %36, i32 0 %40 = insertelement <4 x i32> undef, i32 %28, i32 0 %41 = insertelement <4 x i32> %40, i32 %29, i32 1 %42 = insertelement <4 x i32> %41, i32 %39, i32 2 %43 = bitcast <8 x i32> %23 to <32 x i8> %44 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %42, <32 x i8> %43, i32 14) %45 = extractelement <4 x i32> %44, i32 0 %46 = extractelement <4 x i32> %44, i32 1 %47 = extractelement <4 x i32> %44, i32 2 %48 = extractelement <4 x i32> %44, i32 3 %49 = bitcast i32 %45 to float %50 = bitcast i32 %46 to float %51 = bitcast i32 %47 to float %52 = bitcast i32 %48 to float %53 = fadd float %49, 0.000000e+00 %54 = fadd float %50, 0.000000e+00 %55 = fadd float %51, 0.000000e+00 %56 = fadd float %52, 0.000000e+00 %57 = insertelement <4 x i32> undef, i32 %28, i32 0 %58 = insertelement <4 x i32> %57, i32 %29, i32 1 %59 = insertelement <4 x i32> %58, i32 0, i32 2 %60 = bitcast <8 x i32> %25 to <32 x i8> %61 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %59, <32 x i8> %60, i32 2) %62 = extractelement <4 x i32> %61, i32 0 %63 = lshr i32 %62, 4 %64 = and i32 %63, 15 %65 = extractelement <8 x i32> %25, i32 1 %66 = icmp ne i32 %65, 0 %67 = select i1 %66, i32 %64, i32 1 %68 = insertelement <4 x i32> undef, i32 %28, i32 0 %69 = insertelement <4 x i32> %68, i32 %29, i32 1 %70 = insertelement <4 x i32> %69, i32 %67, i32 2 %71 = bitcast <8 x i32> %23 to <32 x i8> %72 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %70, <32 x i8> %71, i32 14) %73 = extractelement <4 x i32> %72, i32 0 %74 = extractelement <4 x i32> %72, i32 1 %75 = extractelement <4 x i32> %72, i32 2 %76 = extractelement <4 x i32> %72, i32 3 %77 = bitcast i32 %73 to float %78 = bitcast i32 %74 to float %79 = bitcast i32 %75 to float %80 = bitcast i32 %76 to float %81 = fadd float %53, %77 %82 = fadd float %54, %78 %83 = fadd float %55, %79 %84 = fadd float %56, %80 %85 = insertelement <4 x i32> undef, i32 %28, i32 0 %86 = insertelement <4 x i32> %85, i32 %29, i32 1 %87 = insertelement <4 x i32> %86, i32 0, i32 2 %88 = bitcast <8 x i32> %25 to <32 x i8> %89 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %87, <32 x i8> %88, i32 2) %90 = extractelement <4 x i32> %89, i32 0 %91 = lshr i32 %90, 8 %92 = and i32 %91, 15 %93 = extractelement <8 x i32> %25, i32 1 %94 = icmp ne i32 %93, 0 %95 = select i1 %94, i32 %92, i32 2 %96 = insertelement <4 x i32> undef, i32 %28, i32 0 %97 = insertelement <4 x i32> %96, i32 %29, i32 1 %98 = insertelement <4 x i32> %97, i32 %95, i32 2 %99 = bitcast <8 x i32> %23 to <32 x i8> %100 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %98, <32 x i8> %99, i32 14) %101 = extractelement <4 x i32> %100, i32 0 %102 = extractelement <4 x i32> %100, i32 1 %103 = extractelement <4 x i32> %100, i32 2 %104 = extractelement <4 x i32> %100, i32 3 %105 = bitcast i32 %101 to float %106 = bitcast i32 %102 to float %107 = bitcast i32 %103 to float %108 = bitcast i32 %104 to float %109 = fadd float %81, %105 %110 = fadd float %82, %106 %111 = fadd float %83, %107 %112 = fadd float %84, %108 %113 = insertelement <4 x i32> undef, i32 %28, i32 0 %114 = insertelement <4 x i32> %113, i32 %29, i32 1 %115 = insertelement <4 x i32> %114, i32 0, i32 2 %116 = bitcast <8 x i32> %25 to <32 x i8> %117 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %115, <32 x i8> %116, i32 2) %118 = extractelement <4 x i32> %117, i32 0 %119 = lshr i32 %118, 12 %120 = and i32 %119, 15 %121 = extractelement <8 x i32> %25, i32 1 %122 = icmp ne i32 %121, 0 %123 = select i1 %122, i32 %120, i32 3 %124 = insertelement <4 x i32> undef, i32 %28, i32 0 %125 = insertelement <4 x i32> %124, i32 %29, i32 1 %126 = insertelement <4 x i32> %125, i32 %123, i32 2 %127 = bitcast <8 x i32> %23 to <32 x i8> %128 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %126, <32 x i8> %127, i32 14) %129 = extractelement <4 x i32> %128, i32 0 %130 = extractelement <4 x i32> %128, i32 1 %131 = extractelement <4 x i32> %128, i32 2 %132 = extractelement <4 x i32> %128, i32 3 %133 = bitcast i32 %129 to float %134 = bitcast i32 %130 to float %135 = bitcast i32 %131 to float %136 = bitcast i32 %132 to float %137 = fadd float %109, %133 %138 = fadd float %110, %134 %139 = fadd float %111, %135 %140 = fadd float %112, %136 %141 = insertelement <4 x i32> undef, i32 %28, i32 0 %142 = insertelement <4 x i32> %141, i32 %29, i32 1 %143 = insertelement <4 x i32> %142, i32 0, i32 2 %144 = bitcast <8 x i32> %25 to <32 x i8> %145 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %143, <32 x i8> %144, i32 2) %146 = extractelement <4 x i32> %145, i32 0 %147 = lshr i32 %146, 16 %148 = and i32 %147, 15 %149 = extractelement <8 x i32> %25, i32 1 %150 = icmp ne i32 %149, 0 %151 = select i1 %150, i32 %148, i32 4 %152 = insertelement <4 x i32> undef, i32 %28, i32 0 %153 = insertelement <4 x i32> %152, i32 %29, i32 1 %154 = insertelement <4 x i32> %153, i32 %151, i32 2 %155 = bitcast <8 x i32> %23 to <32 x i8> %156 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %154, <32 x i8> %155, i32 14) %157 = extractelement <4 x i32> %156, i32 0 %158 = extractelement <4 x i32> %156, i32 1 %159 = extractelement <4 x i32> %156, i32 2 %160 = extractelement <4 x i32> %156, i32 3 %161 = bitcast i32 %157 to float %162 = bitcast i32 %158 to float %163 = bitcast i32 %159 to float %164 = bitcast i32 %160 to float %165 = fadd float %137, %161 %166 = fadd float %138, %162 %167 = fadd float %139, %163 %168 = fadd float %140, %164 %169 = insertelement <4 x i32> undef, i32 %28, i32 0 %170 = insertelement <4 x i32> %169, i32 %29, i32 1 %171 = insertelement <4 x i32> %170, i32 0, i32 2 %172 = bitcast <8 x i32> %25 to <32 x i8> %173 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %171, <32 x i8> %172, i32 2) %174 = extractelement <4 x i32> %173, i32 0 %175 = lshr i32 %174, 20 %176 = and i32 %175, 15 %177 = extractelement <8 x i32> %25, i32 1 %178 = icmp ne i32 %177, 0 %179 = select i1 %178, i32 %176, i32 5 %180 = insertelement <4 x i32> undef, i32 %28, i32 0 %181 = insertelement <4 x i32> %180, i32 %29, i32 1 %182 = insertelement <4 x i32> %181, i32 %179, i32 2 %183 = bitcast <8 x i32> %23 to <32 x i8> %184 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %182, <32 x i8> %183, i32 14) %185 = extractelement <4 x i32> %184, i32 0 %186 = extractelement <4 x i32> %184, i32 1 %187 = extractelement <4 x i32> %184, i32 2 %188 = extractelement <4 x i32> %184, i32 3 %189 = bitcast i32 %185 to float %190 = bitcast i32 %186 to float %191 = bitcast i32 %187 to float %192 = bitcast i32 %188 to float %193 = fadd float %165, %189 %194 = fadd float %166, %190 %195 = fadd float %167, %191 %196 = fadd float %168, %192 %197 = insertelement <4 x i32> undef, i32 %28, i32 0 %198 = insertelement <4 x i32> %197, i32 %29, i32 1 %199 = insertelement <4 x i32> %198, i32 0, i32 2 %200 = bitcast <8 x i32> %25 to <32 x i8> %201 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %199, <32 x i8> %200, i32 2) %202 = extractelement <4 x i32> %201, i32 0 %203 = lshr i32 %202, 24 %204 = and i32 %203, 15 %205 = extractelement <8 x i32> %25, i32 1 %206 = icmp ne i32 %205, 0 %207 = select i1 %206, i32 %204, i32 6 %208 = insertelement <4 x i32> undef, i32 %28, i32 0 %209 = insertelement <4 x i32> %208, i32 %29, i32 1 %210 = insertelement <4 x i32> %209, i32 %207, i32 2 %211 = bitcast <8 x i32> %23 to <32 x i8> %212 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %210, <32 x i8> %211, i32 14) %213 = extractelement <4 x i32> %212, i32 0 %214 = extractelement <4 x i32> %212, i32 1 %215 = extractelement <4 x i32> %212, i32 2 %216 = extractelement <4 x i32> %212, i32 3 %217 = bitcast i32 %213 to float %218 = bitcast i32 %214 to float %219 = bitcast i32 %215 to float %220 = bitcast i32 %216 to float %221 = fadd float %193, %217 %222 = fadd float %194, %218 %223 = fadd float %195, %219 %224 = fadd float %196, %220 %225 = insertelement <4 x i32> undef, i32 %28, i32 0 %226 = insertelement <4 x i32> %225, i32 %29, i32 1 %227 = insertelement <4 x i32> %226, i32 0, i32 2 %228 = bitcast <8 x i32> %25 to <32 x i8> %229 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %227, <32 x i8> %228, i32 2) %230 = extractelement <4 x i32> %229, i32 0 %231 = lshr i32 %230, 28 %232 = extractelement <8 x i32> %25, i32 1 %233 = icmp ne i32 %232, 0 %234 = select i1 %233, i32 %231, i32 7 %235 = insertelement <4 x i32> undef, i32 %28, i32 0 %236 = insertelement <4 x i32> %235, i32 %29, i32 1 %237 = insertelement <4 x i32> %236, i32 %234, i32 2 %238 = bitcast <8 x i32> %23 to <32 x i8> %239 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %237, <32 x i8> %238, i32 14) %240 = extractelement <4 x i32> %239, i32 0 %241 = extractelement <4 x i32> %239, i32 1 %242 = extractelement <4 x i32> %239, i32 2 %243 = extractelement <4 x i32> %239, i32 3 %244 = bitcast i32 %240 to float %245 = bitcast i32 %241 to float %246 = bitcast i32 %242 to float %247 = bitcast i32 %243 to float %248 = fadd float %221, %244 %249 = fadd float %222, %245 %250 = fadd float %223, %246 %251 = fadd float %224, %247 %252 = fmul float %248, 1.250000e-01 %253 = fmul float %249, 1.250000e-01 %254 = fmul float %250, 1.250000e-01 %255 = fmul float %251, 1.250000e-01 %256 = call i32 @llvm.SI.packf16(float %252, float %253) %257 = bitcast i32 %256 to float %258 = call i32 @llvm.SI.packf16(float %254, float %255) %259 = bitcast i32 %258 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %257, float %259, float %257, float %259) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx8 s[12:19], s[6:7], 0x88 ; C0C60788 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx8 s[0:7], s[6:7], 0x0 ; C0C00700 v_interp_p1_f32 v0, v0, 1, 0, [m0] ; C8000100 v_interp_p2_f32 v0, [v0], v1, 1, 0, [m0] ; C8010101 v_cvt_u32_f32_e32 v1, v2 ; 7E020F02 v_cvt_u32_f32_e32 v2, v0 ; 7E040F00 v_mov_b32_e32 v3, 0 ; 7E060280 s_waitcnt lgkmcnt(0) ; BF8C007F image_load_mip v0, 1, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[12:19] ; F0040100 00030001 v_cmp_ne_i32_e64 s[8:9], 0, s13 ; D10A0008 00001A80 s_waitcnt vmcnt(0) ; BF8C0770 v_and_b32_e32 v5, 15, v0 ; 360A008F v_cndmask_b32_e64 v3, 0, v5, s[8:9] ; D2000003 00220A80 image_load v[5:8], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000501 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v9, 0, v5 ; 06120A80 v_add_f32_e32 v10, 0, v6 ; 06140C80 v_add_f32_e32 v11, 0, v7 ; 06160E80 v_add_f32_e32 v5, 0, v8 ; 060A1080 v_bfe_u32 v6, v0, 4, 4 ; D2900006 02110900 v_cndmask_b32_e64 v3, 1, v6, s[8:9] ; D2000003 00220C81 image_load v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000C01 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v6, v12, v9 ; 060C130C v_add_f32_e32 v7, v13, v10 ; 060E150D v_add_f32_e32 v8, v14, v11 ; 0610170E v_add_f32_e32 v5, v15, v5 ; 060A0B0F v_bfe_u32 v9, v0, 8, 4 ; D2900009 02111100 v_cndmask_b32_e64 v3, 2, v9, s[8:9] ; D2000003 00221282 image_load v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000901 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v6, v9, v6 ; 060C0D09 v_add_f32_e32 v7, v10, v7 ; 060E0F0A v_add_f32_e32 v8, v11, v8 ; 0610110B v_add_f32_e32 v5, v12, v5 ; 060A0B0C v_bfe_u32 v9, v0, 12, 4 ; D2900009 02111900 v_cndmask_b32_e64 v3, 3, v9, s[8:9] ; D2000003 00221283 image_load v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000901 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v6, v9, v6 ; 060C0D09 v_add_f32_e32 v7, v10, v7 ; 060E0F0A v_add_f32_e32 v8, v11, v8 ; 0610110B v_add_f32_e32 v5, v12, v5 ; 060A0B0C v_bfe_u32 v9, v0, 16, 4 ; D2900009 02112100 v_cndmask_b32_e64 v3, 4, v9, s[8:9] ; D2000003 00221284 v_bfe_u32 v9, v0, 20, 4 ; D2900009 02112900 image_load v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000A01 v_cndmask_b32_e64 v3, 5, v9, s[8:9] ; D2000003 00221285 v_bfe_u32 v9, v0, 24, 4 ; D2900009 02113100 image_load v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000E01 v_cndmask_b32_e64 v3, 6, v9, s[8:9] ; D2000003 00221286 v_lshrrev_b32_e32 v0, 28, v0 ; 2C00009C image_load v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00001201 v_cndmask_b32_e64 v3, 7, v0, s[8:9] ; D2000003 00220087 image_load v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000001 s_waitcnt vmcnt(3) ; BF8C0773 v_add_f32_e32 v4, v10, v6 ; 06080D0A v_add_f32_e32 v6, v11, v7 ; 060C0F0B v_add_f32_e32 v7, v12, v8 ; 060E110C v_add_f32_e32 v5, v13, v5 ; 060A0B0D s_waitcnt vmcnt(2) ; BF8C0772 v_add_f32_e32 v4, v14, v4 ; 0608090E v_add_f32_e32 v6, v15, v6 ; 060C0D0F v_add_f32_e32 v7, v16, v7 ; 060E0F10 v_add_f32_e32 v5, v17, v5 ; 060A0B11 s_waitcnt vmcnt(1) ; BF8C0771 v_add_f32_e32 v4, v18, v4 ; 06080912 v_add_f32_e32 v6, v19, v6 ; 060C0D13 v_add_f32_e32 v7, v20, v7 ; 060E0F14 v_add_f32_e32 v5, v21, v5 ; 060A0B15 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v0, v4 ; 06080900 v_add_f32_e32 v6, v1, v6 ; 060C0D01 v_add_f32_e32 v7, v2, v7 ; 060E0F02 v_add_f32_e32 v0, v3, v5 ; 06000B03 v_mul_f32_e32 v1, 0x3e000000, v4 ; 100208FF 3E000000 v_mul_f32_e32 v2, 0x3e000000, v6 ; 10040CFF 3E000000 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_mul_f32_e32 v2, 0x3e000000, v7 ; 10040EFF 3E000000 v_mul_f32_e32 v0, 0x3e000000, v0 ; 100000FF 3E000000 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 24 Code Size: 460 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = extractelement <4 x float> %14, i32 3 %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 %21 = add i32 %5, %7 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = extractelement <4 x float> %22, i32 2 %26 = extractelement <4 x float> %22, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL 0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D 1: MOV OUT[0], TEMP[0] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call i32 @llvm.SI.packf16(float %33, float %34) %38 = bitcast i32 %37 to float %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %38, float %40, float %38, float %40) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v4, v0, v1 ; 5E080300 v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702 exp 15, 0, 1, 1, 1, v4, v0, v4, v0 ; F8001C0F 00040004 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..2] IMM[0] FLT32 { 0.0000, 0.1250, 0.0000, 0.0000} IMM[1] UINT32 {0, 1, 2, 3} IMM[2] UINT32 {4, 5, 6, 7} 0: MOV TEMP[0], IMM[0].xxxx 1: F2U TEMP[1], IN[0] 2: MOV TEMP[1].w, IMM[1].xxxx 3: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 4: ADD TEMP[0], TEMP[0], TEMP[2] 5: MOV TEMP[1].w, IMM[1].yyyy 6: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 7: ADD TEMP[0], TEMP[0], TEMP[2] 8: MOV TEMP[1].w, IMM[1].zzzz 9: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 10: ADD TEMP[0], TEMP[0], TEMP[2] 11: MOV TEMP[1].w, IMM[1].wwww 12: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 13: ADD TEMP[0], TEMP[0], TEMP[2] 14: MOV TEMP[1].w, IMM[2].xxxx 15: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 16: ADD TEMP[0], TEMP[0], TEMP[2] 17: MOV TEMP[1].w, IMM[2].yyyy 18: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 19: ADD TEMP[0], TEMP[0], TEMP[2] 20: MOV TEMP[1].w, IMM[2].zzzz 21: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 22: ADD TEMP[0], TEMP[0], TEMP[2] 23: MOV TEMP[1].w, IMM[2].wwww 24: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 25: ADD TEMP[0], TEMP[0], TEMP[2] 26: MUL TEMP[0], TEMP[0], IMM[0].yyyy 27: MOV OUT[0], TEMP[0] 28: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0 %24 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 17 %25 = load <8 x i32>, <8 x i32> addrspace(2)* %24, align 32, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = fptoui float %26 to i32 %29 = fptoui float %27 to i32 %30 = insertelement <4 x i32> undef, i32 %28, i32 0 %31 = insertelement <4 x i32> %30, i32 %29, i32 1 %32 = insertelement <4 x i32> %31, i32 0, i32 2 %33 = bitcast <8 x i32> %25 to <32 x i8> %34 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %32, <32 x i8> %33, i32 2) %35 = extractelement <4 x i32> %34, i32 0 %36 = and i32 %35, 15 %37 = extractelement <8 x i32> %25, i32 1 %38 = icmp ne i32 %37, 0 %39 = select i1 %38, i32 %36, i32 0 %40 = insertelement <4 x i32> undef, i32 %28, i32 0 %41 = insertelement <4 x i32> %40, i32 %29, i32 1 %42 = insertelement <4 x i32> %41, i32 %39, i32 2 %43 = bitcast <8 x i32> %23 to <32 x i8> %44 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %42, <32 x i8> %43, i32 14) %45 = extractelement <4 x i32> %44, i32 0 %46 = extractelement <4 x i32> %44, i32 1 %47 = extractelement <4 x i32> %44, i32 2 %48 = extractelement <4 x i32> %44, i32 3 %49 = bitcast i32 %45 to float %50 = bitcast i32 %46 to float %51 = bitcast i32 %47 to float %52 = bitcast i32 %48 to float %53 = fadd float %49, 0.000000e+00 %54 = fadd float %50, 0.000000e+00 %55 = fadd float %51, 0.000000e+00 %56 = fadd float %52, 0.000000e+00 %57 = insertelement <4 x i32> undef, i32 %28, i32 0 %58 = insertelement <4 x i32> %57, i32 %29, i32 1 %59 = insertelement <4 x i32> %58, i32 0, i32 2 %60 = bitcast <8 x i32> %25 to <32 x i8> %61 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %59, <32 x i8> %60, i32 2) %62 = extractelement <4 x i32> %61, i32 0 %63 = lshr i32 %62, 4 %64 = and i32 %63, 15 %65 = extractelement <8 x i32> %25, i32 1 %66 = icmp ne i32 %65, 0 %67 = select i1 %66, i32 %64, i32 1 %68 = insertelement <4 x i32> undef, i32 %28, i32 0 %69 = insertelement <4 x i32> %68, i32 %29, i32 1 %70 = insertelement <4 x i32> %69, i32 %67, i32 2 %71 = bitcast <8 x i32> %23 to <32 x i8> %72 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %70, <32 x i8> %71, i32 14) %73 = extractelement <4 x i32> %72, i32 0 %74 = extractelement <4 x i32> %72, i32 1 %75 = extractelement <4 x i32> %72, i32 2 %76 = extractelement <4 x i32> %72, i32 3 %77 = bitcast i32 %73 to float %78 = bitcast i32 %74 to float %79 = bitcast i32 %75 to float %80 = bitcast i32 %76 to float %81 = fadd float %53, %77 %82 = fadd float %54, %78 %83 = fadd float %55, %79 %84 = fadd float %56, %80 %85 = insertelement <4 x i32> undef, i32 %28, i32 0 %86 = insertelement <4 x i32> %85, i32 %29, i32 1 %87 = insertelement <4 x i32> %86, i32 0, i32 2 %88 = bitcast <8 x i32> %25 to <32 x i8> %89 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %87, <32 x i8> %88, i32 2) %90 = extractelement <4 x i32> %89, i32 0 %91 = lshr i32 %90, 8 %92 = and i32 %91, 15 %93 = extractelement <8 x i32> %25, i32 1 %94 = icmp ne i32 %93, 0 %95 = select i1 %94, i32 %92, i32 2 %96 = insertelement <4 x i32> undef, i32 %28, i32 0 %97 = insertelement <4 x i32> %96, i32 %29, i32 1 %98 = insertelement <4 x i32> %97, i32 %95, i32 2 %99 = bitcast <8 x i32> %23 to <32 x i8> %100 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %98, <32 x i8> %99, i32 14) %101 = extractelement <4 x i32> %100, i32 0 %102 = extractelement <4 x i32> %100, i32 1 %103 = extractelement <4 x i32> %100, i32 2 %104 = extractelement <4 x i32> %100, i32 3 %105 = bitcast i32 %101 to float %106 = bitcast i32 %102 to float %107 = bitcast i32 %103 to float %108 = bitcast i32 %104 to float %109 = fadd float %81, %105 %110 = fadd float %82, %106 %111 = fadd float %83, %107 %112 = fadd float %84, %108 %113 = insertelement <4 x i32> undef, i32 %28, i32 0 %114 = insertelement <4 x i32> %113, i32 %29, i32 1 %115 = insertelement <4 x i32> %114, i32 0, i32 2 %116 = bitcast <8 x i32> %25 to <32 x i8> %117 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %115, <32 x i8> %116, i32 2) %118 = extractelement <4 x i32> %117, i32 0 %119 = lshr i32 %118, 12 %120 = and i32 %119, 15 %121 = extractelement <8 x i32> %25, i32 1 %122 = icmp ne i32 %121, 0 %123 = select i1 %122, i32 %120, i32 3 %124 = insertelement <4 x i32> undef, i32 %28, i32 0 %125 = insertelement <4 x i32> %124, i32 %29, i32 1 %126 = insertelement <4 x i32> %125, i32 %123, i32 2 %127 = bitcast <8 x i32> %23 to <32 x i8> %128 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %126, <32 x i8> %127, i32 14) %129 = extractelement <4 x i32> %128, i32 0 %130 = extractelement <4 x i32> %128, i32 1 %131 = extractelement <4 x i32> %128, i32 2 %132 = extractelement <4 x i32> %128, i32 3 %133 = bitcast i32 %129 to float %134 = bitcast i32 %130 to float %135 = bitcast i32 %131 to float %136 = bitcast i32 %132 to float %137 = fadd float %109, %133 %138 = fadd float %110, %134 %139 = fadd float %111, %135 %140 = fadd float %112, %136 %141 = insertelement <4 x i32> undef, i32 %28, i32 0 %142 = insertelement <4 x i32> %141, i32 %29, i32 1 %143 = insertelement <4 x i32> %142, i32 0, i32 2 %144 = bitcast <8 x i32> %25 to <32 x i8> %145 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %143, <32 x i8> %144, i32 2) %146 = extractelement <4 x i32> %145, i32 0 %147 = lshr i32 %146, 16 %148 = and i32 %147, 15 %149 = extractelement <8 x i32> %25, i32 1 %150 = icmp ne i32 %149, 0 %151 = select i1 %150, i32 %148, i32 4 %152 = insertelement <4 x i32> undef, i32 %28, i32 0 %153 = insertelement <4 x i32> %152, i32 %29, i32 1 %154 = insertelement <4 x i32> %153, i32 %151, i32 2 %155 = bitcast <8 x i32> %23 to <32 x i8> %156 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %154, <32 x i8> %155, i32 14) %157 = extractelement <4 x i32> %156, i32 0 %158 = extractelement <4 x i32> %156, i32 1 %159 = extractelement <4 x i32> %156, i32 2 %160 = extractelement <4 x i32> %156, i32 3 %161 = bitcast i32 %157 to float %162 = bitcast i32 %158 to float %163 = bitcast i32 %159 to float %164 = bitcast i32 %160 to float %165 = fadd float %137, %161 %166 = fadd float %138, %162 %167 = fadd float %139, %163 %168 = fadd float %140, %164 %169 = insertelement <4 x i32> undef, i32 %28, i32 0 %170 = insertelement <4 x i32> %169, i32 %29, i32 1 %171 = insertelement <4 x i32> %170, i32 0, i32 2 %172 = bitcast <8 x i32> %25 to <32 x i8> %173 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %171, <32 x i8> %172, i32 2) %174 = extractelement <4 x i32> %173, i32 0 %175 = lshr i32 %174, 20 %176 = and i32 %175, 15 %177 = extractelement <8 x i32> %25, i32 1 %178 = icmp ne i32 %177, 0 %179 = select i1 %178, i32 %176, i32 5 %180 = insertelement <4 x i32> undef, i32 %28, i32 0 %181 = insertelement <4 x i32> %180, i32 %29, i32 1 %182 = insertelement <4 x i32> %181, i32 %179, i32 2 %183 = bitcast <8 x i32> %23 to <32 x i8> %184 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %182, <32 x i8> %183, i32 14) %185 = extractelement <4 x i32> %184, i32 0 %186 = extractelement <4 x i32> %184, i32 1 %187 = extractelement <4 x i32> %184, i32 2 %188 = extractelement <4 x i32> %184, i32 3 %189 = bitcast i32 %185 to float %190 = bitcast i32 %186 to float %191 = bitcast i32 %187 to float %192 = bitcast i32 %188 to float %193 = fadd float %165, %189 %194 = fadd float %166, %190 %195 = fadd float %167, %191 %196 = fadd float %168, %192 %197 = insertelement <4 x i32> undef, i32 %28, i32 0 %198 = insertelement <4 x i32> %197, i32 %29, i32 1 %199 = insertelement <4 x i32> %198, i32 0, i32 2 %200 = bitcast <8 x i32> %25 to <32 x i8> %201 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %199, <32 x i8> %200, i32 2) %202 = extractelement <4 x i32> %201, i32 0 %203 = lshr i32 %202, 24 %204 = and i32 %203, 15 %205 = extractelement <8 x i32> %25, i32 1 %206 = icmp ne i32 %205, 0 %207 = select i1 %206, i32 %204, i32 6 %208 = insertelement <4 x i32> undef, i32 %28, i32 0 %209 = insertelement <4 x i32> %208, i32 %29, i32 1 %210 = insertelement <4 x i32> %209, i32 %207, i32 2 %211 = bitcast <8 x i32> %23 to <32 x i8> %212 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %210, <32 x i8> %211, i32 14) %213 = extractelement <4 x i32> %212, i32 0 %214 = extractelement <4 x i32> %212, i32 1 %215 = extractelement <4 x i32> %212, i32 2 %216 = extractelement <4 x i32> %212, i32 3 %217 = bitcast i32 %213 to float %218 = bitcast i32 %214 to float %219 = bitcast i32 %215 to float %220 = bitcast i32 %216 to float %221 = fadd float %193, %217 %222 = fadd float %194, %218 %223 = fadd float %195, %219 %224 = fadd float %196, %220 %225 = insertelement <4 x i32> undef, i32 %28, i32 0 %226 = insertelement <4 x i32> %225, i32 %29, i32 1 %227 = insertelement <4 x i32> %226, i32 0, i32 2 %228 = bitcast <8 x i32> %25 to <32 x i8> %229 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %227, <32 x i8> %228, i32 2) %230 = extractelement <4 x i32> %229, i32 0 %231 = lshr i32 %230, 28 %232 = extractelement <8 x i32> %25, i32 1 %233 = icmp ne i32 %232, 0 %234 = select i1 %233, i32 %231, i32 7 %235 = insertelement <4 x i32> undef, i32 %28, i32 0 %236 = insertelement <4 x i32> %235, i32 %29, i32 1 %237 = insertelement <4 x i32> %236, i32 %234, i32 2 %238 = bitcast <8 x i32> %23 to <32 x i8> %239 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %237, <32 x i8> %238, i32 14) %240 = extractelement <4 x i32> %239, i32 0 %241 = extractelement <4 x i32> %239, i32 1 %242 = extractelement <4 x i32> %239, i32 2 %243 = extractelement <4 x i32> %239, i32 3 %244 = bitcast i32 %240 to float %245 = bitcast i32 %241 to float %246 = bitcast i32 %242 to float %247 = bitcast i32 %243 to float %248 = fadd float %221, %244 %249 = fadd float %222, %245 %250 = fadd float %223, %246 %251 = fadd float %224, %247 %252 = fmul float %248, 1.250000e-01 %253 = fmul float %249, 1.250000e-01 %254 = fmul float %250, 1.250000e-01 %255 = fmul float %251, 1.250000e-01 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %252, float %253, float %254, float %255) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx8 s[12:19], s[6:7], 0x88 ; C0C60788 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx8 s[0:7], s[6:7], 0x0 ; C0C00700 v_interp_p1_f32 v0, v0, 1, 0, [m0] ; C8000100 v_interp_p2_f32 v0, [v0], v1, 1, 0, [m0] ; C8010101 v_cvt_u32_f32_e32 v1, v2 ; 7E020F02 v_cvt_u32_f32_e32 v2, v0 ; 7E040F00 v_mov_b32_e32 v3, 0 ; 7E060280 s_waitcnt lgkmcnt(0) ; BF8C007F image_load_mip v0, 1, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[12:19] ; F0040100 00030001 v_cmp_ne_i32_e64 s[8:9], 0, s13 ; D10A0008 00001A80 s_waitcnt vmcnt(0) ; BF8C0770 v_and_b32_e32 v5, 15, v0 ; 360A008F v_cndmask_b32_e64 v3, 0, v5, s[8:9] ; D2000003 00220A80 image_load v[5:8], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000501 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v9, 0, v5 ; 06120A80 v_add_f32_e32 v10, 0, v6 ; 06140C80 v_add_f32_e32 v11, 0, v7 ; 06160E80 v_add_f32_e32 v5, 0, v8 ; 060A1080 v_bfe_u32 v6, v0, 4, 4 ; D2900006 02110900 v_cndmask_b32_e64 v3, 1, v6, s[8:9] ; D2000003 00220C81 image_load v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000C01 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v6, v12, v9 ; 060C130C v_add_f32_e32 v7, v13, v10 ; 060E150D v_add_f32_e32 v8, v14, v11 ; 0610170E v_add_f32_e32 v5, v15, v5 ; 060A0B0F v_bfe_u32 v9, v0, 8, 4 ; D2900009 02111100 v_cndmask_b32_e64 v3, 2, v9, s[8:9] ; D2000003 00221282 image_load v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000901 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v6, v9, v6 ; 060C0D09 v_add_f32_e32 v7, v10, v7 ; 060E0F0A v_add_f32_e32 v8, v11, v8 ; 0610110B v_add_f32_e32 v5, v12, v5 ; 060A0B0C v_bfe_u32 v9, v0, 12, 4 ; D2900009 02111900 v_cndmask_b32_e64 v3, 3, v9, s[8:9] ; D2000003 00221283 image_load v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000901 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v6, v9, v6 ; 060C0D09 v_add_f32_e32 v7, v10, v7 ; 060E0F0A v_add_f32_e32 v8, v11, v8 ; 0610110B v_add_f32_e32 v5, v12, v5 ; 060A0B0C v_bfe_u32 v9, v0, 16, 4 ; D2900009 02112100 v_cndmask_b32_e64 v3, 4, v9, s[8:9] ; D2000003 00221284 v_bfe_u32 v9, v0, 20, 4 ; D2900009 02112900 image_load v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000A01 v_cndmask_b32_e64 v3, 5, v9, s[8:9] ; D2000003 00221285 v_bfe_u32 v9, v0, 24, 4 ; D2900009 02113100 image_load v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000E01 v_cndmask_b32_e64 v3, 6, v9, s[8:9] ; D2000003 00221286 v_lshrrev_b32_e32 v0, 28, v0 ; 2C00009C image_load v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00001201 v_cndmask_b32_e64 v3, 7, v0, s[8:9] ; D2000003 00220087 image_load v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000F00 00000001 s_waitcnt vmcnt(3) ; BF8C0773 v_add_f32_e32 v4, v10, v6 ; 06080D0A v_add_f32_e32 v6, v11, v7 ; 060C0F0B v_add_f32_e32 v7, v12, v8 ; 060E110C v_add_f32_e32 v5, v13, v5 ; 060A0B0D s_waitcnt vmcnt(2) ; BF8C0772 v_add_f32_e32 v4, v14, v4 ; 0608090E v_add_f32_e32 v6, v15, v6 ; 060C0D0F v_add_f32_e32 v7, v16, v7 ; 060E0F10 v_add_f32_e32 v5, v17, v5 ; 060A0B11 s_waitcnt vmcnt(1) ; BF8C0771 v_add_f32_e32 v4, v18, v4 ; 06080912 v_add_f32_e32 v6, v19, v6 ; 060C0D13 v_add_f32_e32 v7, v20, v7 ; 060E0F14 v_add_f32_e32 v5, v21, v5 ; 060A0B15 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v0, v4 ; 06080900 v_add_f32_e32 v6, v1, v6 ; 060C0D01 v_add_f32_e32 v7, v2, v7 ; 060E0F02 v_add_f32_e32 v0, v3, v5 ; 06000B03 v_mul_f32_e32 v1, 0x3e000000, v4 ; 100208FF 3E000000 v_mul_f32_e32 v2, 0x3e000000, v6 ; 10040CFF 3E000000 v_mul_f32_e32 v3, 0x3e000000, v7 ; 10060EFF 3E000000 v_mul_f32_e32 v0, 0x3e000000, v0 ; 100000FF 3E000000 exp 15, 0, 0, 1, 1, v1, v2, v3, v0 ; F800180F 00030201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 24 Code Size: 452 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** Probe color at (12,12) Expected: 0.000000 1.000000 0.000000 0.000000 Observed: 0.000000 0.000000 0.000000 1.000000 SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %23, float %24, float %25) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 exp 15, 0, 0, 1, 1, v0, v1, v2, v3 ; F800180F 03020100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 4 Code Size: 32 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL OUT[1], POSITION DCL SAMP[0] IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxy 1: TEX OUT[1].z, IN[0], SAMP[0], 2D 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2) %33 = extractelement <4 x float> %32, i32 2 call void @llvm.SI.export(i32 1, i32 0, i32 0, i32 8, i32 0, float %33, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_mov_b32_e32 v0, 0 ; 7E000280 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 4, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800400 00030102 s_waitcnt vmcnt(0) ; BF8C0770 exp 1, 8, 0, 0, 0, v1, v0, v0, v0 ; F8000081 00000001 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 0, 0, 1, 1, v0, v0, v0, v1 ; F800180F 01000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG 0: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) ret void } declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } Shader Disassembly: v_mov_b32_e32 v0, 0 ; 7E000280 exp 0, 0, 0, 1, 1, v0, v0, v0, v0 ; F8001800 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 16 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], COLOR 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: MOV OUT[2], IN[2] 3: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = extractelement <4 x float> %14, i32 3 %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 %21 = add i32 %5, %7 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = extractelement <4 x float> %22, i32 2 %26 = extractelement <4 x float> %22, i32 3 %27 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0 %29 = add i32 %5, %7 %30 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %29) %31 = extractelement <4 x float> %30, i32 0 %32 = extractelement <4 x float> %30, i32 1 %33 = extractelement <4 x float> %30, i32 2 %34 = extractelement <4 x float> %30, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %31, float %32, float %33, float %34) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900 s_waitcnt vmcnt(1) ; BF8C0771 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v9, v10, v11, v12 ; F800021F 0C0B0A09 exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL IN[1], GENERIC[0], PERSPECTIVE DCL OUT[0], POSITION DCL OUT[1], COLOR DCL SAMP[0] 0: TEX OUT[0].z, IN[1], SAMP[0], 2D 1: MOV OUT[1], IN[0] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %23, <16 x i8> %25, i32 2) %37 = extractelement <4 x float> %36, i32 2 %38 = call i32 @llvm.SI.packf16(float %26, float %27) %39 = bitcast i32 %38 to float %40 = call i32 @llvm.SI.packf16(float %28, float %29) %41 = bitcast i32 %40 to float call void @llvm.SI.export(i32 1, i32 0, i32 0, i32 8, i32 0, float %37, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %39, float %41, float %39, float %41) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 4, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800400 00020006 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v2, v4, v5 ; 5E040B04 v_mov_b32_e32 v3, 0 ; 7E060280 s_waitcnt vmcnt(0) ; BF8C0770 exp 1, 8, 0, 0, 0, v0, v3, v3, v3 ; F8000081 03030300 exp 15, 0, 1, 1, 1, v1, v2, v1, v2 ; F8001C0F 02010201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 112 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL SAMP[0] DCL OUT[0], POSITION DCL TEMP[0] 0: F2U TEMP[0], IN[0] 1: TXF OUT[0].z, TEMP[0], SAMP[0], 2D_MSAA 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 17 %25 = load <8 x i32>, <8 x i32> addrspace(2)* %24, align 32, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %11) %29 = fptoui float %26 to i32 %30 = fptoui float %27 to i32 %31 = fptoui float %28 to i32 %32 = insertelement <4 x i32> undef, i32 %29, i32 0 %33 = insertelement <4 x i32> %32, i32 %30, i32 1 %34 = insertelement <4 x i32> %33, i32 0, i32 2 %35 = bitcast <8 x i32> %25 to <32 x i8> %36 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %34, <32 x i8> %35, i32 2) %37 = extractelement <4 x i32> %36, i32 0 %38 = shl i32 %31, 2 %39 = lshr i32 %37, %38 %40 = and i32 %39, 15 %41 = extractelement <8 x i32> %25, i32 1 %42 = icmp ne i32 %41, 0 %43 = select i1 %42, i32 %40, i32 %31 %44 = insertelement <4 x i32> undef, i32 %29, i32 0 %45 = insertelement <4 x i32> %44, i32 %30, i32 1 %46 = insertelement <4 x i32> %45, i32 %43, i32 2 %47 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %46, <32 x i8> %23, i32 14) %48 = extractelement <4 x i32> %47, i32 2 %49 = bitcast i32 %48 to float call void @llvm.SI.export(i32 1, i32 1, i32 1, i32 8, i32 0, float %49, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_load_dwordx8 s[8:15], s[6:7], 0x88 ; C0C40788 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v0, v0, 3, 0, [m0] ; C8000300 v_interp_p2_f32 v0, [v0], v1, 3, 0, [m0] ; C8010301 v_cvt_u32_f32_e32 v4, v2 ; 7E080F02 s_load_dwordx8 s[0:7], s[6:7], 0x0 ; C0C00700 v_cvt_u32_f32_e32 v5, v3 ; 7E0A0F03 v_cvt_u32_f32_e32 v0, v0 ; 7E000F00 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt lgkmcnt(0) ; BF8C007F image_load_mip v1, 1, 0, 0, 0, 0, 0, 0, 0, v[4:7], s[8:15] ; F0040100 00020104 v_cmp_ne_i32_e64 s[8:9], 0, s9 ; D10A0008 00001280 v_lshlrev_b32_e32 v2, 2, v0 ; 34040082 s_waitcnt vmcnt(0) ; BF8C0770 v_bfe_u32 v1, v1, v2, 4 ; D2900001 02120501 v_cndmask_b32_e64 v0, v0, v1, s[8:9] ; D2000000 00220300 v_mov_b32_e32 v8, v4 ; 7E100304 v_mov_b32_e32 v9, v5 ; 7E120305 v_mov_b32_e32 v10, v6 ; 7E140306 v_mov_b32_e32 v11, v7 ; 7E160307 v_mov_b32_e32 v10, v0 ; 7E140300 image_load v0, 4, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[0:7] ; F0000400 00000008 s_waitcnt vmcnt(0) ; BF8C0770 exp 1, 8, 0, 1, 1, v0, v6, v6, v6 ; F8001881 06060600 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 140 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], COLOR, COLOR DCL IN[1], GENERIC[0], PERSPECTIVE DCL OUT[0], STENCIL DCL SAMP[0] DCL SAMP[1] 0: TEX OUT[0].y, IN[1], SAMP[1], 2D 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %23 = bitcast <8 x i32> addrspace(2)* %22 to <32 x i8> addrspace(2)* %24 = load <32 x i8>, <32 x i8> addrspace(2)* %23, align 32, !tbaa !0 %25 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %26 = bitcast <4 x i32> addrspace(2)* %25 to <16 x i8> addrspace(2)* %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, align 16, !tbaa !0 %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %30 = bitcast float %28 to i32 %31 = bitcast float %29 to i32 %32 = insertelement <2 x i32> undef, i32 %30, i32 0 %33 = insertelement <2 x i32> %32, i32 %31, i32 1 %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %33, <32 x i8> %24, <16 x i8> %27, i32 2) %35 = extractelement <4 x float> %34, i32 1 call void @llvm.SI.export(i32 2, i32 1, i32 1, i32 8, i32 0, float 0.000000e+00, float %35, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x8 ; C0C60708 s_load_dwordx4 s[0:3], s[4:5], 0x4 ; C0800504 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 2, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800200 00030002 v_mov_b32_e32 v1, 0 ; 7E020280 s_waitcnt vmcnt(0) ; BF8C0770 exp 2, 8, 0, 1, 1, v1, v0, v1, v1 ; F8001882 01010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 64 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL SAMP[0..1] DCL OUT[0], POSITION DCL OUT[1], STENCIL DCL TEMP[0] 0: F2U TEMP[0], IN[0] 1: TXF OUT[0].z, TEMP[0], SAMP[0], 2D_MSAA 2: TXF OUT[1].y, TEMP[0], SAMP[1], 2D_MSAA 3: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 17 %25 = load <8 x i32>, <8 x i32> addrspace(2)* %24, align 32, !tbaa !0 %26 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %27 = bitcast <8 x i32> addrspace(2)* %26 to <32 x i8> addrspace(2)* %28 = load <32 x i8>, <32 x i8> addrspace(2)* %27, align 32, !tbaa !0 %29 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 18 %30 = load <8 x i32>, <8 x i32> addrspace(2)* %29, align 32, !tbaa !0 %31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %33 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %11) %34 = fptoui float %31 to i32 %35 = fptoui float %32 to i32 %36 = fptoui float %33 to i32 %37 = insertelement <4 x i32> undef, i32 %34, i32 0 %38 = insertelement <4 x i32> %37, i32 %35, i32 1 %39 = insertelement <4 x i32> %38, i32 0, i32 2 %40 = bitcast <8 x i32> %25 to <32 x i8> %41 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %39, <32 x i8> %40, i32 2) %42 = extractelement <4 x i32> %41, i32 0 %43 = shl i32 %36, 2 %44 = lshr i32 %42, %43 %45 = and i32 %44, 15 %46 = extractelement <8 x i32> %25, i32 1 %47 = icmp ne i32 %46, 0 %48 = select i1 %47, i32 %45, i32 %36 %49 = insertelement <4 x i32> undef, i32 %34, i32 0 %50 = insertelement <4 x i32> %49, i32 %35, i32 1 %51 = insertelement <4 x i32> %50, i32 %48, i32 2 %52 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %51, <32 x i8> %23, i32 14) %53 = extractelement <4 x i32> %52, i32 2 %54 = bitcast i32 %53 to float %55 = insertelement <4 x i32> undef, i32 %34, i32 0 %56 = insertelement <4 x i32> %55, i32 %35, i32 1 %57 = insertelement <4 x i32> %56, i32 0, i32 2 %58 = bitcast <8 x i32> %30 to <32 x i8> %59 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %57, <32 x i8> %58, i32 2) %60 = extractelement <4 x i32> %59, i32 0 %61 = shl i32 %36, 2 %62 = lshr i32 %60, %61 %63 = and i32 %62, 15 %64 = extractelement <8 x i32> %30, i32 1 %65 = icmp ne i32 %64, 0 %66 = select i1 %65, i32 %63, i32 %36 %67 = insertelement <4 x i32> undef, i32 %34, i32 0 %68 = insertelement <4 x i32> %67, i32 %35, i32 1 %69 = insertelement <4 x i32> %68, i32 %66, i32 2 %70 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %69, <32 x i8> %28, i32 14) %71 = extractelement <4 x i32> %70, i32 1 %72 = bitcast i32 %71 to float call void @llvm.SI.export(i32 3, i32 1, i32 1, i32 8, i32 0, float %54, float %72, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx8 s[8:15], s[6:7], 0x88 ; C0C40788 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v0, v0, 3, 0, [m0] ; C8000300 v_interp_p2_f32 v0, [v0], v1, 3, 0, [m0] ; C8010301 v_cvt_u32_f32_e32 v4, v2 ; 7E080F02 v_cvt_u32_f32_e32 v5, v3 ; 7E0A0F03 v_cvt_u32_f32_e32 v0, v0 ; 7E000F00 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x90 ; C0CC0790 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt lgkmcnt(0) ; BF8C007F image_load_mip v1, 1, 0, 0, 0, 0, 0, 0, 0, v[4:7], s[8:15] ; F0040100 00020104 v_cmp_ne_i32_e64 s[0:1], 0, s9 ; D10A0000 00001280 v_lshlrev_b32_e32 v2, 2, v0 ; 34040082 s_load_dwordx8 s[4:11], s[6:7], 0x8 ; C0C20708 s_waitcnt vmcnt(0) ; BF8C0770 v_bfe_u32 v1, v1, v2, 4 ; D2900001 02120501 v_cndmask_b32_e64 v1, v0, v1, s[0:1] ; D2000001 00020300 v_mov_b32_e32 v8, v4 ; 7E100304 v_mov_b32_e32 v9, v5 ; 7E120305 v_mov_b32_e32 v10, v6 ; 7E140306 v_mov_b32_e32 v11, v7 ; 7E160307 v_mov_b32_e32 v10, v1 ; 7E140301 image_load v1, 4, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[16:23] ; F0000400 00040108 image_load_mip v3, 1, 0, 0, 0, 0, 0, 0, 0, v[4:7], s[24:31] ; F0040100 00060304 v_cmp_ne_i32_e64 s[0:1], 0, s25 ; D10A0000 00003280 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_bfe_u32 v2, v3, v2, 4 ; D2900002 02120503 v_cndmask_b32_e64 v0, v0, v2, s[0:1] ; D2000000 00020500 v_mov_b32_e32 v8, v4 ; 7E100304 v_mov_b32_e32 v9, v5 ; 7E120305 v_mov_b32_e32 v10, v6 ; 7E140306 v_mov_b32_e32 v11, v7 ; 7E160307 v_mov_b32_e32 v10, v0 ; 7E140300 image_load v0, 2, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[4:11] ; F0000200 00010008 s_waitcnt vmcnt(0) ; BF8C0770 exp 3, 8, 0, 1, 1, v1, v0, v6, v6 ; F8001883 06060001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 12 Code Size: 212 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL OUT[1], POSITION DCL OUT[2], STENCIL DCL SAMP[0] DCL SAMP[1] IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxy 1: TEX OUT[1].z, IN[0], SAMP[0], 2D 2: TEX OUT[2].y, IN[0], SAMP[1], 2D 3: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %27 = bitcast <8 x i32> addrspace(2)* %26 to <32 x i8> addrspace(2)* %28 = load <32 x i8>, <32 x i8> addrspace(2)* %27, align 32, !tbaa !0 %29 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %30 = bitcast <4 x i32> addrspace(2)* %29 to <16 x i8> addrspace(2)* %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %34 = bitcast float %32 to i32 %35 = bitcast float %33 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %23, <16 x i8> %25, i32 2) %39 = extractelement <4 x float> %38, i32 2 %40 = bitcast float %32 to i32 %41 = bitcast float %33 to i32 %42 = insertelement <2 x i32> undef, i32 %40, i32 0 %43 = insertelement <2 x i32> %42, i32 %41, i32 1 %44 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %43, <32 x i8> %28, <16 x i8> %31, i32 2) %45 = extractelement <4 x float> %44, i32 1 call void @llvm.SI.export(i32 3, i32 0, i32 0, i32 8, i32 0, float %39, float %45, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v2, 0 ; 7E040280 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 4, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[0:3] ; F0800400 00030004 image_sample v1, 2, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[20:27], s[8:11] ; F0800200 00450104 s_waitcnt vmcnt(0) ; BF8C0770 exp 3, 8, 0, 0, 0, v0, v1, v2, v2 ; F8000083 02020100 exp 15, 0, 0, 1, 1, v2, v2, v2, v3 ; F800180F 03020202 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 92 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** PIGLIT: {"result": "fail" }