VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = extractelement <4 x float> %14, i32 3 %19 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %20 = load <16 x i8> addrspace(2)* %19, !tbaa !0 %21 = add i32 %5, %7 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = extractelement <4 x float> %22, i32 2 %26 = extractelement <4 x float> %22, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %37, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL CONST[0..3] DCL TEMP[0] 0: MUL TEMP[0], IN[0].xxxx, CONST[0] 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %33, %13 %38 = fmul float %33, %14 %39 = fmul float %33, %15 %40 = fmul float %33, %16 %41 = fmul float %34, %17 %42 = fadd float %41, %37 %43 = fmul float %34, %18 %44 = fadd float %43, %38 %45 = fmul float %34, %19 %46 = fadd float %45, %39 %47 = fmul float %34, %20 %48 = fadd float %47, %40 %49 = fmul float %35, %21 %50 = fadd float %49, %42 %51 = fmul float %35, %22 %52 = fadd float %51, %44 %53 = fmul float %35, %23 %54 = fadd float %53, %46 %55 = fmul float %35, %24 %56 = fadd float %55, %48 %57 = fmul float %36, %25 %58 = fadd float %57, %50 %59 = fmul float %36, %26 %60 = fadd float %59, %52 %61 = fmul float %36, %27 %62 = fadd float %61, %54 %63 = fmul float %36, %28 %64 = fadd float %63, %56 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %60, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v0 ; 10080004 v_mad_f32 v4, v1, s8, v4 ; D2820004 04101101 v_mul_f32_e32 v5, s5, v0 ; 100A0005 v_mad_f32 v5, v1, s9, v5 ; D2820005 04141301 v_mul_f32_e32 v6, s6, v0 ; 100C0006 v_mad_f32 v6, v1, s10, v6 ; D2820006 04181501 v_mul_f32_e32 v0, s7, v0 ; 10000007 v_mad_f32 v0, v1, s11, v0 ; D2820000 04001701 v_mad_f32 v1, v2, s12, v4 ; D2820001 04101902 v_mad_f32 v4, v2, s13, v5 ; D2820004 04141B02 v_mad_f32 v5, v2, s14, v6 ; D2820005 04181D02 v_mad_f32 v0, v2, s15, v0 ; D2820000 04001F02 v_mad_f32 v1, v3, s16, v1 ; D2820001 04042103 v_mad_f32 v2, v3, s17, v4 ; D2820002 04102303 v_mad_f32 v4, v3, s18, v5 ; D2820004 04142503 v_mad_f32 v0, v3, s0, v0 ; D2820000 04000103 exp 15, 12, 0, 1, 0, v1, v2, v4, v0 ; F80008CF 00040201 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL OUT[0], COLOR DCL CONST[0..3] 0: MOV OUT[0], CONST[3] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %26, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_buffer_load_dword s6, s[0:3], 0xe ; C203010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s5 ; 7E000205 v_cvt_pkrtz_f16_f32_e32 v0, s4, v0 ; 5E000004 v_mov_b32_e32 v1, s0 ; 7E020200 v_cvt_pkrtz_f16_f32_e32 v1, s6, v1 ; 5E020206 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..2] IMM[0] FLT32 { 0.0000, 0.1250, 0.0000, 0.0000} IMM[1] UINT32 {0, 1, 2, 3} IMM[2] UINT32 {4, 5, 6, 7} 0: MOV TEMP[0], IMM[0].xxxx 1: F2U TEMP[1], IN[0] 2: MOV TEMP[1].w, IMM[1].xxxx 3: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 4: ADD TEMP[0], TEMP[0], TEMP[2] 5: MOV TEMP[1].w, IMM[1].yyyy 6: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 7: ADD TEMP[0], TEMP[0], TEMP[2] 8: MOV TEMP[1].w, IMM[1].zzzz 9: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 10: ADD TEMP[0], TEMP[0], TEMP[2] 11: MOV TEMP[1].w, IMM[1].wwww 12: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 13: ADD TEMP[0], TEMP[0], TEMP[2] 14: MOV TEMP[1].w, IMM[2].xxxx 15: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 16: ADD TEMP[0], TEMP[0], TEMP[2] 17: MOV TEMP[1].w, IMM[2].yyyy 18: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 19: ADD TEMP[0], TEMP[0], TEMP[2] 20: MOV TEMP[1].w, IMM[2].zzzz 21: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 22: ADD TEMP[0], TEMP[0], TEMP[2] 23: MOV TEMP[1].w, IMM[2].wwww 24: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 25: ADD TEMP[0], TEMP[0], TEMP[2] 26: MUL TEMP[0], TEMP[0], IMM[0].yyyy 27: MOV OUT[0], TEMP[0] 28: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 17 %25 = load <8 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = fptoui float %26 to i32 %29 = fptoui float %27 to i32 %30 = bitcast i32 %28 to float %31 = bitcast i32 %29 to float %32 = bitcast float 0.000000e+00 to i32 %33 = bitcast float %30 to i32 %34 = bitcast float %31 to i32 %35 = insertelement <4 x i32> undef, i32 %33, i32 0 %36 = insertelement <4 x i32> %35, i32 %34, i32 1 %37 = insertelement <4 x i32> %36, i32 0, i32 2 %38 = insertelement <4 x i32> %37, i32 0, i32 3 %39 = bitcast <8 x i32> %25 to <32 x i8> %40 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %38, <32 x i8> %39, i32 2) %41 = extractelement <4 x i32> %40, i32 0 %42 = mul i32 %32, 4 %43 = lshr i32 %41, %42 %44 = and i32 %43, 15 %45 = extractelement <8 x i32> %25, i32 1 %46 = icmp ne i32 %45, 0 %47 = select i1 %46, i32 %44, i32 %32 %48 = insertelement <4 x i32> undef, i32 %33, i32 0 %49 = insertelement <4 x i32> %48, i32 %34, i32 1 %50 = insertelement <4 x i32> %49, i32 %47, i32 2 %51 = insertelement <4 x i32> %50, i32 0, i32 3 %52 = bitcast <8 x i32> %23 to <32 x i8> %53 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %51, <32 x i8> %52, i32 14) %54 = extractelement <4 x i32> %53, i32 0 %55 = extractelement <4 x i32> %53, i32 1 %56 = extractelement <4 x i32> %53, i32 2 %57 = extractelement <4 x i32> %53, i32 3 %58 = bitcast i32 %54 to float %59 = bitcast i32 %55 to float %60 = bitcast i32 %56 to float %61 = bitcast i32 %57 to float %62 = fadd float 0.000000e+00, %58 %63 = fadd float 0.000000e+00, %59 %64 = fadd float 0.000000e+00, %60 %65 = fadd float 0.000000e+00, %61 %66 = bitcast float 0x36A0000000000000 to i32 %67 = bitcast float %30 to i32 %68 = bitcast float %31 to i32 %69 = insertelement <4 x i32> undef, i32 %67, i32 0 %70 = insertelement <4 x i32> %69, i32 %68, i32 1 %71 = insertelement <4 x i32> %70, i32 0, i32 2 %72 = insertelement <4 x i32> %71, i32 0, i32 3 %73 = bitcast <8 x i32> %25 to <32 x i8> %74 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %72, <32 x i8> %73, i32 2) %75 = extractelement <4 x i32> %74, i32 0 %76 = mul i32 %66, 4 %77 = lshr i32 %75, %76 %78 = and i32 %77, 15 %79 = extractelement <8 x i32> %25, i32 1 %80 = icmp ne i32 %79, 0 %81 = select i1 %80, i32 %78, i32 %66 %82 = insertelement <4 x i32> undef, i32 %67, i32 0 %83 = insertelement <4 x i32> %82, i32 %68, i32 1 %84 = insertelement <4 x i32> %83, i32 %81, i32 2 %85 = insertelement <4 x i32> %84, i32 0, i32 3 %86 = bitcast <8 x i32> %23 to <32 x i8> %87 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %85, <32 x i8> %86, i32 14) %88 = extractelement <4 x i32> %87, i32 0 %89 = extractelement <4 x i32> %87, i32 1 %90 = extractelement <4 x i32> %87, i32 2 %91 = extractelement <4 x i32> %87, i32 3 %92 = bitcast i32 %88 to float %93 = bitcast i32 %89 to float %94 = bitcast i32 %90 to float %95 = bitcast i32 %91 to float %96 = fadd float %62, %92 %97 = fadd float %63, %93 %98 = fadd float %64, %94 %99 = fadd float %65, %95 %100 = bitcast float 0x36B0000000000000 to i32 %101 = bitcast float %30 to i32 %102 = bitcast float %31 to i32 %103 = insertelement <4 x i32> undef, i32 %101, i32 0 %104 = insertelement <4 x i32> %103, i32 %102, i32 1 %105 = insertelement <4 x i32> %104, i32 0, i32 2 %106 = insertelement <4 x i32> %105, i32 0, i32 3 %107 = bitcast <8 x i32> %25 to <32 x i8> %108 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %106, <32 x i8> %107, i32 2) %109 = extractelement <4 x i32> %108, i32 0 %110 = mul i32 %100, 4 %111 = lshr i32 %109, %110 %112 = and i32 %111, 15 %113 = extractelement <8 x i32> %25, i32 1 %114 = icmp ne i32 %113, 0 %115 = select i1 %114, i32 %112, i32 %100 %116 = insertelement <4 x i32> undef, i32 %101, i32 0 %117 = insertelement <4 x i32> %116, i32 %102, i32 1 %118 = insertelement <4 x i32> %117, i32 %115, i32 2 %119 = insertelement <4 x i32> %118, i32 0, i32 3 %120 = bitcast <8 x i32> %23 to <32 x i8> %121 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %119, <32 x i8> %120, i32 14) %122 = extractelement <4 x i32> %121, i32 0 %123 = extractelement <4 x i32> %121, i32 1 %124 = extractelement <4 x i32> %121, i32 2 %125 = extractelement <4 x i32> %121, i32 3 %126 = bitcast i32 %122 to float %127 = bitcast i32 %123 to float %128 = bitcast i32 %124 to float %129 = bitcast i32 %125 to float %130 = fadd float %96, %126 %131 = fadd float %97, %127 %132 = fadd float %98, %128 %133 = fadd float %99, %129 %134 = bitcast float 0x36B8000000000000 to i32 %135 = bitcast float %30 to i32 %136 = bitcast float %31 to i32 %137 = insertelement <4 x i32> undef, i32 %135, i32 0 %138 = insertelement <4 x i32> %137, i32 %136, i32 1 %139 = insertelement <4 x i32> %138, i32 0, i32 2 %140 = insertelement <4 x i32> %139, i32 0, i32 3 %141 = bitcast <8 x i32> %25 to <32 x i8> %142 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %140, <32 x i8> %141, i32 2) %143 = extractelement <4 x i32> %142, i32 0 %144 = mul i32 %134, 4 %145 = lshr i32 %143, %144 %146 = and i32 %145, 15 %147 = extractelement <8 x i32> %25, i32 1 %148 = icmp ne i32 %147, 0 %149 = select i1 %148, i32 %146, i32 %134 %150 = insertelement <4 x i32> undef, i32 %135, i32 0 %151 = insertelement <4 x i32> %150, i32 %136, i32 1 %152 = insertelement <4 x i32> %151, i32 %149, i32 2 %153 = insertelement <4 x i32> %152, i32 0, i32 3 %154 = bitcast <8 x i32> %23 to <32 x i8> %155 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %153, <32 x i8> %154, i32 14) %156 = extractelement <4 x i32> %155, i32 0 %157 = extractelement <4 x i32> %155, i32 1 %158 = extractelement <4 x i32> %155, i32 2 %159 = extractelement <4 x i32> %155, i32 3 %160 = bitcast i32 %156 to float %161 = bitcast i32 %157 to float %162 = bitcast i32 %158 to float %163 = bitcast i32 %159 to float %164 = fadd float %130, %160 %165 = fadd float %131, %161 %166 = fadd float %132, %162 %167 = fadd float %133, %163 %168 = bitcast float 0x36C0000000000000 to i32 %169 = bitcast float %30 to i32 %170 = bitcast float %31 to i32 %171 = insertelement <4 x i32> undef, i32 %169, i32 0 %172 = insertelement <4 x i32> %171, i32 %170, i32 1 %173 = insertelement <4 x i32> %172, i32 0, i32 2 %174 = insertelement <4 x i32> %173, i32 0, i32 3 %175 = bitcast <8 x i32> %25 to <32 x i8> %176 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %174, <32 x i8> %175, i32 2) %177 = extractelement <4 x i32> %176, i32 0 %178 = mul i32 %168, 4 %179 = lshr i32 %177, %178 %180 = and i32 %179, 15 %181 = extractelement <8 x i32> %25, i32 1 %182 = icmp ne i32 %181, 0 %183 = select i1 %182, i32 %180, i32 %168 %184 = insertelement <4 x i32> undef, i32 %169, i32 0 %185 = insertelement <4 x i32> %184, i32 %170, i32 1 %186 = insertelement <4 x i32> %185, i32 %183, i32 2 %187 = insertelement <4 x i32> %186, i32 0, i32 3 %188 = bitcast <8 x i32> %23 to <32 x i8> %189 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %187, <32 x i8> %188, i32 14) %190 = extractelement <4 x i32> %189, i32 0 %191 = extractelement <4 x i32> %189, i32 1 %192 = extractelement <4 x i32> %189, i32 2 %193 = extractelement <4 x i32> %189, i32 3 %194 = bitcast i32 %190 to float %195 = bitcast i32 %191 to float %196 = bitcast i32 %192 to float %197 = bitcast i32 %193 to float %198 = fadd float %164, %194 %199 = fadd float %165, %195 %200 = fadd float %166, %196 %201 = fadd float %167, %197 %202 = bitcast float 0x36C4000000000000 to i32 %203 = bitcast float %30 to i32 %204 = bitcast float %31 to i32 %205 = insertelement <4 x i32> undef, i32 %203, i32 0 %206 = insertelement <4 x i32> %205, i32 %204, i32 1 %207 = insertelement <4 x i32> %206, i32 0, i32 2 %208 = insertelement <4 x i32> %207, i32 0, i32 3 %209 = bitcast <8 x i32> %25 to <32 x i8> %210 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %208, <32 x i8> %209, i32 2) %211 = extractelement <4 x i32> %210, i32 0 %212 = mul i32 %202, 4 %213 = lshr i32 %211, %212 %214 = and i32 %213, 15 %215 = extractelement <8 x i32> %25, i32 1 %216 = icmp ne i32 %215, 0 %217 = select i1 %216, i32 %214, i32 %202 %218 = insertelement <4 x i32> undef, i32 %203, i32 0 %219 = insertelement <4 x i32> %218, i32 %204, i32 1 %220 = insertelement <4 x i32> %219, i32 %217, i32 2 %221 = insertelement <4 x i32> %220, i32 0, i32 3 %222 = bitcast <8 x i32> %23 to <32 x i8> %223 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %221, <32 x i8> %222, i32 14) %224 = extractelement <4 x i32> %223, i32 0 %225 = extractelement <4 x i32> %223, i32 1 %226 = extractelement <4 x i32> %223, i32 2 %227 = extractelement <4 x i32> %223, i32 3 %228 = bitcast i32 %224 to float %229 = bitcast i32 %225 to float %230 = bitcast i32 %226 to float %231 = bitcast i32 %227 to float %232 = fadd float %198, %228 %233 = fadd float %199, %229 %234 = fadd float %200, %230 %235 = fadd float %201, %231 %236 = bitcast float 0x36C8000000000000 to i32 %237 = bitcast float %30 to i32 %238 = bitcast float %31 to i32 %239 = insertelement <4 x i32> undef, i32 %237, i32 0 %240 = insertelement <4 x i32> %239, i32 %238, i32 1 %241 = insertelement <4 x i32> %240, i32 0, i32 2 %242 = insertelement <4 x i32> %241, i32 0, i32 3 %243 = bitcast <8 x i32> %25 to <32 x i8> %244 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %242, <32 x i8> %243, i32 2) %245 = extractelement <4 x i32> %244, i32 0 %246 = mul i32 %236, 4 %247 = lshr i32 %245, %246 %248 = and i32 %247, 15 %249 = extractelement <8 x i32> %25, i32 1 %250 = icmp ne i32 %249, 0 %251 = select i1 %250, i32 %248, i32 %236 %252 = insertelement <4 x i32> undef, i32 %237, i32 0 %253 = insertelement <4 x i32> %252, i32 %238, i32 1 %254 = insertelement <4 x i32> %253, i32 %251, i32 2 %255 = insertelement <4 x i32> %254, i32 0, i32 3 %256 = bitcast <8 x i32> %23 to <32 x i8> %257 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %255, <32 x i8> %256, i32 14) %258 = extractelement <4 x i32> %257, i32 0 %259 = extractelement <4 x i32> %257, i32 1 %260 = extractelement <4 x i32> %257, i32 2 %261 = extractelement <4 x i32> %257, i32 3 %262 = bitcast i32 %258 to float %263 = bitcast i32 %259 to float %264 = bitcast i32 %260 to float %265 = bitcast i32 %261 to float %266 = fadd float %232, %262 %267 = fadd float %233, %263 %268 = fadd float %234, %264 %269 = fadd float %235, %265 %270 = bitcast float 0x36CC000000000000 to i32 %271 = bitcast float %30 to i32 %272 = bitcast float %31 to i32 %273 = insertelement <4 x i32> undef, i32 %271, i32 0 %274 = insertelement <4 x i32> %273, i32 %272, i32 1 %275 = insertelement <4 x i32> %274, i32 0, i32 2 %276 = insertelement <4 x i32> %275, i32 0, i32 3 %277 = bitcast <8 x i32> %25 to <32 x i8> %278 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %276, <32 x i8> %277, i32 2) %279 = extractelement <4 x i32> %278, i32 0 %280 = mul i32 %270, 4 %281 = lshr i32 %279, %280 %282 = and i32 %281, 15 %283 = extractelement <8 x i32> %25, i32 1 %284 = icmp ne i32 %283, 0 %285 = select i1 %284, i32 %282, i32 %270 %286 = insertelement <4 x i32> undef, i32 %271, i32 0 %287 = insertelement <4 x i32> %286, i32 %272, i32 1 %288 = insertelement <4 x i32> %287, i32 %285, i32 2 %289 = insertelement <4 x i32> %288, i32 0, i32 3 %290 = bitcast <8 x i32> %23 to <32 x i8> %291 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %289, <32 x i8> %290, i32 14) %292 = extractelement <4 x i32> %291, i32 0 %293 = extractelement <4 x i32> %291, i32 1 %294 = extractelement <4 x i32> %291, i32 2 %295 = extractelement <4 x i32> %291, i32 3 %296 = bitcast i32 %292 to float %297 = bitcast i32 %293 to float %298 = bitcast i32 %294 to float %299 = bitcast i32 %295 to float %300 = fadd float %266, %296 %301 = fadd float %267, %297 %302 = fadd float %268, %298 %303 = fadd float %269, %299 %304 = fmul float %300, 1.250000e-01 %305 = fmul float %301, 1.250000e-01 %306 = fmul float %302, 1.250000e-01 %307 = fmul float %303, 1.250000e-01 %308 = call i32 @llvm.SI.packf16(float %304, float %305) %309 = bitcast i32 %308 to float %310 = call i32 @llvm.SI.packf16(float %306, float %307) %311 = bitcast i32 %310 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %309, float %311, float %309, float %311) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v0, v0, 1, 0, [m0] ; C8000100 v_interp_p2_f32 v0, [v0], v1, 1, 0, [m0] ; C8010101 v_cvt_u32_f32_e32 v1, v2 ; 7E020F02 v_cvt_u32_f32_e32 v2, v0 ; 7E040F00 v_mov_b32_e32 v3, 0 ; 7E060280 v_mov_b32_e32 v4, v3 ; 7E080303 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_mov_b32_e32 v5, v1 ; 7E0A0301 v_mov_b32_e32 v6, v2 ; 7E0C0302 v_mov_b32_e32 v7, v3 ; 7E0E0303 v_mov_b32_e32 v8, v4 ; 7E100304 v_mov_b32_e32 v9, v1 ; 7E120301 v_mov_b32_e32 v10, v2 ; 7E140302 v_mov_b32_e32 v11, v3 ; 7E160303 v_mov_b32_e32 v12, v4 ; 7E180304 v_mov_b32_e32 v13, v1 ; 7E1A0301 v_mov_b32_e32 v14, v2 ; 7E1C0302 v_mov_b32_e32 v15, v3 ; 7E1E0303 v_mov_b32_e32 v16, v4 ; 7E200304 v_mov_b32_e32 v17, v1 ; 7E220301 v_mov_b32_e32 v18, v2 ; 7E240302 v_mov_b32_e32 v19, v3 ; 7E260303 v_mov_b32_e32 v20, v4 ; 7E280304 s_load_dwordx8 s[16:23], s[6:7], 0x88 ; C0C80788 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ne_i32_e64 s[0:1], s17, 0 ; D10A0000 00010011 image_load_mip v0, 1, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[16:23] ; F0040100 00040001 s_waitcnt vmcnt(0) ; BF8C0770 v_and_b32_e32 v4, 15, v0 ; 3608008F v_cndmask_b32_e64 v4, 0, v4, s[0:1] ; D2000004 00020880 v_mov_b32_e32 v7, v4 ; 7E0E0304 v_mov_b32_e32 v8, v3 ; 7E100303 image_load v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[5:8], s[8:15] ; F0000F00 00020405 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, 0, v4 ; 06080880 v_add_f32_e32 v5, 0, v5 ; 060A0A80 v_add_f32_e32 v6, 0, v6 ; 060C0C80 v_add_f32_e32 v7, 0, v7 ; 060E0E80 v_lshrrev_b32_e32 v8, 4, v0 ; 2C100084 v_and_b32_e32 v8, 15, v8 ; 3610108F v_cndmask_b32_e64 v8, 1, v8, s[0:1] ; D2000008 00021081 v_mov_b32_e32 v11, v8 ; 7E160308 v_mov_b32_e32 v12, v3 ; 7E180303 image_load v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[8:15] ; F0000F00 00020809 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v8, v4 ; 06080908 v_add_f32_e32 v5, v9, v5 ; 060A0B09 v_add_f32_e32 v6, v10, v6 ; 060C0D0A v_add_f32_e32 v7, v11, v7 ; 060E0F0B v_lshrrev_b32_e32 v8, 8, v0 ; 2C100088 v_and_b32_e32 v8, 15, v8 ; 3610108F v_cndmask_b32_e64 v8, 2, v8, s[0:1] ; D2000008 00021082 v_mov_b32_e32 v15, v8 ; 7E1E0308 v_mov_b32_e32 v16, v3 ; 7E200303 image_load v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[13:16], s[8:15] ; F0000F00 0002080D s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v8, v4 ; 06080908 v_add_f32_e32 v5, v9, v5 ; 060A0B09 v_add_f32_e32 v6, v10, v6 ; 060C0D0A v_add_f32_e32 v7, v11, v7 ; 060E0F0B v_lshrrev_b32_e32 v8, 12, v0 ; 2C10008C v_and_b32_e32 v8, 15, v8 ; 3610108F v_cndmask_b32_e64 v8, 3, v8, s[0:1] ; D2000008 00021083 v_mov_b32_e32 v19, v8 ; 7E260308 v_mov_b32_e32 v20, v3 ; 7E280303 image_load v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[8:15] ; F0000F00 00020811 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v8, v4 ; 06080908 v_add_f32_e32 v5, v9, v5 ; 060A0B09 v_add_f32_e32 v6, v10, v6 ; 060C0D0A v_add_f32_e32 v7, v11, v7 ; 060E0F0B v_lshrrev_b32_e32 v8, 16, v0 ; 2C100090 v_and_b32_e32 v8, 15, v8 ; 3610108F v_cndmask_b32_e64 v8, 4, v8, s[0:1] ; D2000008 00021084 v_mov_b32_e32 v9, v1 ; 7E120301 v_mov_b32_e32 v10, v2 ; 7E140302 v_mov_b32_e32 v11, v3 ; 7E160303 v_mov_b32_e32 v12, v4 ; 7E180304 v_mov_b32_e32 v11, v8 ; 7E160308 v_lshrrev_b32_e32 v8, 20, v0 ; 2C100094 v_and_b32_e32 v8, 15, v8 ; 3610108F v_cndmask_b32_e64 v8, 5, v8, s[0:1] ; D2000008 00021085 v_mov_b32_e32 v13, v1 ; 7E1A0301 v_mov_b32_e32 v14, v2 ; 7E1C0302 v_mov_b32_e32 v15, v3 ; 7E1E0303 v_mov_b32_e32 v16, v4 ; 7E200304 v_mov_b32_e32 v15, v8 ; 7E1E0308 v_lshrrev_b32_e32 v8, 24, v0 ; 2C100098 v_and_b32_e32 v8, 15, v8 ; 3610108F v_cndmask_b32_e64 v8, 6, v8, s[0:1] ; D2000008 00021086 v_mov_b32_e32 v17, v1 ; 7E220301 v_mov_b32_e32 v18, v2 ; 7E240302 v_mov_b32_e32 v19, v3 ; 7E260303 v_mov_b32_e32 v20, v4 ; 7E280304 v_mov_b32_e32 v19, v8 ; 7E260308 v_lshrrev_b32_e32 v0, 28, v0 ; 2C00009C v_cndmask_b32_e64 v0, 7, v0, s[0:1] ; D2000000 00020087 v_mov_b32_e32 v21, v1 ; 7E2A0301 v_mov_b32_e32 v22, v2 ; 7E2C0302 v_mov_b32_e32 v23, v3 ; 7E2E0303 v_mov_b32_e32 v24, v4 ; 7E300304 v_mov_b32_e32 v23, v0 ; 7E2E0300 v_mov_b32_e32 v12, v3 ; 7E180303 v_mov_b32_e32 v16, v3 ; 7E200303 v_mov_b32_e32 v20, v3 ; 7E280303 v_mov_b32_e32 v24, v3 ; 7E300303 image_load v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[8:15] ; F0000F00 00020009 image_load v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[13:16], s[8:15] ; F0000F00 0002080D image_load v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[8:15] ; F0000F00 00020C11 image_load v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[8:15] ; F0000F00 00021015 s_waitcnt vmcnt(3) ; BF8C0773 v_add_f32_e32 v0, v0, v4 ; 06000900 v_add_f32_e32 v1, v1, v5 ; 06020B01 v_add_f32_e32 v2, v2, v6 ; 06040D02 v_add_f32_e32 v3, v3, v7 ; 06060F03 s_waitcnt vmcnt(2) ; BF8C0772 v_add_f32_e32 v0, v8, v0 ; 06000108 v_add_f32_e32 v1, v9, v1 ; 06020309 v_add_f32_e32 v2, v10, v2 ; 0604050A v_add_f32_e32 v3, v11, v3 ; 0606070B s_waitcnt vmcnt(1) ; BF8C0771 v_add_f32_e32 v0, v12, v0 ; 0600010C v_add_f32_e32 v1, v13, v1 ; 0602030D v_add_f32_e32 v2, v14, v2 ; 0604050E v_add_f32_e32 v3, v15, v3 ; 0606070F s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v16, v0 ; 06000110 v_add_f32_e32 v1, v17, v1 ; 06020311 v_add_f32_e32 v2, v18, v2 ; 06040512 v_add_f32_e32 v3, v19, v3 ; 06060713 v_mul_f32_e32 v0, 0x3e000000, v0 ; 100000FF 3E000000 v_mul_f32_e32 v1, 0x3e000000, v1 ; 100202FF 3E000000 v_mul_f32_e32 v2, 0x3e000000, v2 ; 100404FF 3E000000 v_mul_f32_e32 v3, 0x3e000000, v3 ; 100606FF 3E000000 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = extractelement <4 x float> %14, i32 3 %19 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %20 = load <16 x i8> addrspace(2)* %19, !tbaa !0 %21 = add i32 %5, %7 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = extractelement <4 x float> %22, i32 2 %26 = extractelement <4 x float> %22, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL 0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D 1: MOV OUT[0], TEMP[0] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %37, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..2] IMM[0] FLT32 { 0.0000, 0.1250, 0.0000, 0.0000} IMM[1] UINT32 {0, 1, 2, 3} IMM[2] UINT32 {4, 5, 6, 7} 0: MOV TEMP[0], IMM[0].xxxx 1: F2U TEMP[1], IN[0] 2: MOV TEMP[1].w, IMM[1].xxxx 3: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 4: ADD TEMP[0], TEMP[0], TEMP[2] 5: MOV TEMP[1].w, IMM[1].yyyy 6: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 7: ADD TEMP[0], TEMP[0], TEMP[2] 8: MOV TEMP[1].w, IMM[1].zzzz 9: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 10: ADD TEMP[0], TEMP[0], TEMP[2] 11: MOV TEMP[1].w, IMM[1].wwww 12: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 13: ADD TEMP[0], TEMP[0], TEMP[2] 14: MOV TEMP[1].w, IMM[2].xxxx 15: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 16: ADD TEMP[0], TEMP[0], TEMP[2] 17: MOV TEMP[1].w, IMM[2].yyyy 18: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 19: ADD TEMP[0], TEMP[0], TEMP[2] 20: MOV TEMP[1].w, IMM[2].zzzz 21: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 22: ADD TEMP[0], TEMP[0], TEMP[2] 23: MOV TEMP[1].w, IMM[2].wwww 24: TXF TEMP[2], TEMP[1], SAMP[0], 2D_MSAA 25: ADD TEMP[0], TEMP[0], TEMP[2] 26: MUL TEMP[0], TEMP[0], IMM[0].yyyy 27: MOV OUT[0], TEMP[0] 28: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 17 %25 = load <8 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = fptoui float %26 to i32 %29 = fptoui float %27 to i32 %30 = bitcast i32 %28 to float %31 = bitcast i32 %29 to float %32 = bitcast float 0.000000e+00 to i32 %33 = bitcast float %30 to i32 %34 = bitcast float %31 to i32 %35 = insertelement <4 x i32> undef, i32 %33, i32 0 %36 = insertelement <4 x i32> %35, i32 %34, i32 1 %37 = insertelement <4 x i32> %36, i32 0, i32 2 %38 = insertelement <4 x i32> %37, i32 0, i32 3 %39 = bitcast <8 x i32> %25 to <32 x i8> %40 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %38, <32 x i8> %39, i32 2) %41 = extractelement <4 x i32> %40, i32 0 %42 = mul i32 %32, 4 %43 = lshr i32 %41, %42 %44 = and i32 %43, 15 %45 = extractelement <8 x i32> %25, i32 1 %46 = icmp ne i32 %45, 0 %47 = select i1 %46, i32 %44, i32 %32 %48 = insertelement <4 x i32> undef, i32 %33, i32 0 %49 = insertelement <4 x i32> %48, i32 %34, i32 1 %50 = insertelement <4 x i32> %49, i32 %47, i32 2 %51 = insertelement <4 x i32> %50, i32 0, i32 3 %52 = bitcast <8 x i32> %23 to <32 x i8> %53 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %51, <32 x i8> %52, i32 14) %54 = extractelement <4 x i32> %53, i32 0 %55 = extractelement <4 x i32> %53, i32 1 %56 = extractelement <4 x i32> %53, i32 2 %57 = extractelement <4 x i32> %53, i32 3 %58 = bitcast i32 %54 to float %59 = bitcast i32 %55 to float %60 = bitcast i32 %56 to float %61 = bitcast i32 %57 to float %62 = fadd float 0.000000e+00, %58 %63 = fadd float 0.000000e+00, %59 %64 = fadd float 0.000000e+00, %60 %65 = fadd float 0.000000e+00, %61 %66 = bitcast float 0x36A0000000000000 to i32 %67 = bitcast float %30 to i32 %68 = bitcast float %31 to i32 %69 = insertelement <4 x i32> undef, i32 %67, i32 0 %70 = insertelement <4 x i32> %69, i32 %68, i32 1 %71 = insertelement <4 x i32> %70, i32 0, i32 2 %72 = insertelement <4 x i32> %71, i32 0, i32 3 %73 = bitcast <8 x i32> %25 to <32 x i8> %74 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %72, <32 x i8> %73, i32 2) %75 = extractelement <4 x i32> %74, i32 0 %76 = mul i32 %66, 4 %77 = lshr i32 %75, %76 %78 = and i32 %77, 15 %79 = extractelement <8 x i32> %25, i32 1 %80 = icmp ne i32 %79, 0 %81 = select i1 %80, i32 %78, i32 %66 %82 = insertelement <4 x i32> undef, i32 %67, i32 0 %83 = insertelement <4 x i32> %82, i32 %68, i32 1 %84 = insertelement <4 x i32> %83, i32 %81, i32 2 %85 = insertelement <4 x i32> %84, i32 0, i32 3 %86 = bitcast <8 x i32> %23 to <32 x i8> %87 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %85, <32 x i8> %86, i32 14) %88 = extractelement <4 x i32> %87, i32 0 %89 = extractelement <4 x i32> %87, i32 1 %90 = extractelement <4 x i32> %87, i32 2 %91 = extractelement <4 x i32> %87, i32 3 %92 = bitcast i32 %88 to float %93 = bitcast i32 %89 to float %94 = bitcast i32 %90 to float %95 = bitcast i32 %91 to float %96 = fadd float %62, %92 %97 = fadd float %63, %93 %98 = fadd float %64, %94 %99 = fadd float %65, %95 %100 = bitcast float 0x36B0000000000000 to i32 %101 = bitcast float %30 to i32 %102 = bitcast float %31 to i32 %103 = insertelement <4 x i32> undef, i32 %101, i32 0 %104 = insertelement <4 x i32> %103, i32 %102, i32 1 %105 = insertelement <4 x i32> %104, i32 0, i32 2 %106 = insertelement <4 x i32> %105, i32 0, i32 3 %107 = bitcast <8 x i32> %25 to <32 x i8> %108 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %106, <32 x i8> %107, i32 2) %109 = extractelement <4 x i32> %108, i32 0 %110 = mul i32 %100, 4 %111 = lshr i32 %109, %110 %112 = and i32 %111, 15 %113 = extractelement <8 x i32> %25, i32 1 %114 = icmp ne i32 %113, 0 %115 = select i1 %114, i32 %112, i32 %100 %116 = insertelement <4 x i32> undef, i32 %101, i32 0 %117 = insertelement <4 x i32> %116, i32 %102, i32 1 %118 = insertelement <4 x i32> %117, i32 %115, i32 2 %119 = insertelement <4 x i32> %118, i32 0, i32 3 %120 = bitcast <8 x i32> %23 to <32 x i8> %121 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %119, <32 x i8> %120, i32 14) %122 = extractelement <4 x i32> %121, i32 0 %123 = extractelement <4 x i32> %121, i32 1 %124 = extractelement <4 x i32> %121, i32 2 %125 = extractelement <4 x i32> %121, i32 3 %126 = bitcast i32 %122 to float %127 = bitcast i32 %123 to float %128 = bitcast i32 %124 to float %129 = bitcast i32 %125 to float %130 = fadd float %96, %126 %131 = fadd float %97, %127 %132 = fadd float %98, %128 %133 = fadd float %99, %129 %134 = bitcast float 0x36B8000000000000 to i32 %135 = bitcast float %30 to i32 %136 = bitcast float %31 to i32 %137 = insertelement <4 x i32> undef, i32 %135, i32 0 %138 = insertelement <4 x i32> %137, i32 %136, i32 1 %139 = insertelement <4 x i32> %138, i32 0, i32 2 %140 = insertelement <4 x i32> %139, i32 0, i32 3 %141 = bitcast <8 x i32> %25 to <32 x i8> %142 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %140, <32 x i8> %141, i32 2) %143 = extractelement <4 x i32> %142, i32 0 %144 = mul i32 %134, 4 %145 = lshr i32 %143, %144 %146 = and i32 %145, 15 %147 = extractelement <8 x i32> %25, i32 1 %148 = icmp ne i32 %147, 0 %149 = select i1 %148, i32 %146, i32 %134 %150 = insertelement <4 x i32> undef, i32 %135, i32 0 %151 = insertelement <4 x i32> %150, i32 %136, i32 1 %152 = insertelement <4 x i32> %151, i32 %149, i32 2 %153 = insertelement <4 x i32> %152, i32 0, i32 3 %154 = bitcast <8 x i32> %23 to <32 x i8> %155 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %153, <32 x i8> %154, i32 14) %156 = extractelement <4 x i32> %155, i32 0 %157 = extractelement <4 x i32> %155, i32 1 %158 = extractelement <4 x i32> %155, i32 2 %159 = extractelement <4 x i32> %155, i32 3 %160 = bitcast i32 %156 to float %161 = bitcast i32 %157 to float %162 = bitcast i32 %158 to float %163 = bitcast i32 %159 to float %164 = fadd float %130, %160 %165 = fadd float %131, %161 %166 = fadd float %132, %162 %167 = fadd float %133, %163 %168 = bitcast float 0x36C0000000000000 to i32 %169 = bitcast float %30 to i32 %170 = bitcast float %31 to i32 %171 = insertelement <4 x i32> undef, i32 %169, i32 0 %172 = insertelement <4 x i32> %171, i32 %170, i32 1 %173 = insertelement <4 x i32> %172, i32 0, i32 2 %174 = insertelement <4 x i32> %173, i32 0, i32 3 %175 = bitcast <8 x i32> %25 to <32 x i8> %176 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %174, <32 x i8> %175, i32 2) %177 = extractelement <4 x i32> %176, i32 0 %178 = mul i32 %168, 4 %179 = lshr i32 %177, %178 %180 = and i32 %179, 15 %181 = extractelement <8 x i32> %25, i32 1 %182 = icmp ne i32 %181, 0 %183 = select i1 %182, i32 %180, i32 %168 %184 = insertelement <4 x i32> undef, i32 %169, i32 0 %185 = insertelement <4 x i32> %184, i32 %170, i32 1 %186 = insertelement <4 x i32> %185, i32 %183, i32 2 %187 = insertelement <4 x i32> %186, i32 0, i32 3 %188 = bitcast <8 x i32> %23 to <32 x i8> %189 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %187, <32 x i8> %188, i32 14) %190 = extractelement <4 x i32> %189, i32 0 %191 = extractelement <4 x i32> %189, i32 1 %192 = extractelement <4 x i32> %189, i32 2 %193 = extractelement <4 x i32> %189, i32 3 %194 = bitcast i32 %190 to float %195 = bitcast i32 %191 to float %196 = bitcast i32 %192 to float %197 = bitcast i32 %193 to float %198 = fadd float %164, %194 %199 = fadd float %165, %195 %200 = fadd float %166, %196 %201 = fadd float %167, %197 %202 = bitcast float 0x36C4000000000000 to i32 %203 = bitcast float %30 to i32 %204 = bitcast float %31 to i32 %205 = insertelement <4 x i32> undef, i32 %203, i32 0 %206 = insertelement <4 x i32> %205, i32 %204, i32 1 %207 = insertelement <4 x i32> %206, i32 0, i32 2 %208 = insertelement <4 x i32> %207, i32 0, i32 3 %209 = bitcast <8 x i32> %25 to <32 x i8> %210 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %208, <32 x i8> %209, i32 2) %211 = extractelement <4 x i32> %210, i32 0 %212 = mul i32 %202, 4 %213 = lshr i32 %211, %212 %214 = and i32 %213, 15 %215 = extractelement <8 x i32> %25, i32 1 %216 = icmp ne i32 %215, 0 %217 = select i1 %216, i32 %214, i32 %202 %218 = insertelement <4 x i32> undef, i32 %203, i32 0 %219 = insertelement <4 x i32> %218, i32 %204, i32 1 %220 = insertelement <4 x i32> %219, i32 %217, i32 2 %221 = insertelement <4 x i32> %220, i32 0, i32 3 %222 = bitcast <8 x i32> %23 to <32 x i8> %223 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %221, <32 x i8> %222, i32 14) %224 = extractelement <4 x i32> %223, i32 0 %225 = extractelement <4 x i32> %223, i32 1 %226 = extractelement <4 x i32> %223, i32 2 %227 = extractelement <4 x i32> %223, i32 3 %228 = bitcast i32 %224 to float %229 = bitcast i32 %225 to float %230 = bitcast i32 %226 to float %231 = bitcast i32 %227 to float %232 = fadd float %198, %228 %233 = fadd float %199, %229 %234 = fadd float %200, %230 %235 = fadd float %201, %231 %236 = bitcast float 0x36C8000000000000 to i32 %237 = bitcast float %30 to i32 %238 = bitcast float %31 to i32 %239 = insertelement <4 x i32> undef, i32 %237, i32 0 %240 = insertelement <4 x i32> %239, i32 %238, i32 1 %241 = insertelement <4 x i32> %240, i32 0, i32 2 %242 = insertelement <4 x i32> %241, i32 0, i32 3 %243 = bitcast <8 x i32> %25 to <32 x i8> %244 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %242, <32 x i8> %243, i32 2) %245 = extractelement <4 x i32> %244, i32 0 %246 = mul i32 %236, 4 %247 = lshr i32 %245, %246 %248 = and i32 %247, 15 %249 = extractelement <8 x i32> %25, i32 1 %250 = icmp ne i32 %249, 0 %251 = select i1 %250, i32 %248, i32 %236 %252 = insertelement <4 x i32> undef, i32 %237, i32 0 %253 = insertelement <4 x i32> %252, i32 %238, i32 1 %254 = insertelement <4 x i32> %253, i32 %251, i32 2 %255 = insertelement <4 x i32> %254, i32 0, i32 3 %256 = bitcast <8 x i32> %23 to <32 x i8> %257 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %255, <32 x i8> %256, i32 14) %258 = extractelement <4 x i32> %257, i32 0 %259 = extractelement <4 x i32> %257, i32 1 %260 = extractelement <4 x i32> %257, i32 2 %261 = extractelement <4 x i32> %257, i32 3 %262 = bitcast i32 %258 to float %263 = bitcast i32 %259 to float %264 = bitcast i32 %260 to float %265 = bitcast i32 %261 to float %266 = fadd float %232, %262 %267 = fadd float %233, %263 %268 = fadd float %234, %264 %269 = fadd float %235, %265 %270 = bitcast float 0x36CC000000000000 to i32 %271 = bitcast float %30 to i32 %272 = bitcast float %31 to i32 %273 = insertelement <4 x i32> undef, i32 %271, i32 0 %274 = insertelement <4 x i32> %273, i32 %272, i32 1 %275 = insertelement <4 x i32> %274, i32 0, i32 2 %276 = insertelement <4 x i32> %275, i32 0, i32 3 %277 = bitcast <8 x i32> %25 to <32 x i8> %278 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %276, <32 x i8> %277, i32 2) %279 = extractelement <4 x i32> %278, i32 0 %280 = mul i32 %270, 4 %281 = lshr i32 %279, %280 %282 = and i32 %281, 15 %283 = extractelement <8 x i32> %25, i32 1 %284 = icmp ne i32 %283, 0 %285 = select i1 %284, i32 %282, i32 %270 %286 = insertelement <4 x i32> undef, i32 %271, i32 0 %287 = insertelement <4 x i32> %286, i32 %272, i32 1 %288 = insertelement <4 x i32> %287, i32 %285, i32 2 %289 = insertelement <4 x i32> %288, i32 0, i32 3 %290 = bitcast <8 x i32> %23 to <32 x i8> %291 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %289, <32 x i8> %290, i32 14) %292 = extractelement <4 x i32> %291, i32 0 %293 = extractelement <4 x i32> %291, i32 1 %294 = extractelement <4 x i32> %291, i32 2 %295 = extractelement <4 x i32> %291, i32 3 %296 = bitcast i32 %292 to float %297 = bitcast i32 %293 to float %298 = bitcast i32 %294 to float %299 = bitcast i32 %295 to float %300 = fadd float %266, %296 %301 = fadd float %267, %297 %302 = fadd float %268, %298 %303 = fadd float %269, %299 %304 = fmul float %300, 1.250000e-01 %305 = fmul float %301, 1.250000e-01 %306 = fmul float %302, 1.250000e-01 %307 = fmul float %303, 1.250000e-01 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %304, float %305, float %306, float %307) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v0, v0, 1, 0, [m0] ; C8000100 v_interp_p2_f32 v0, [v0], v1, 1, 0, [m0] ; C8010101 v_cvt_u32_f32_e32 v1, v2 ; 7E020F02 v_cvt_u32_f32_e32 v2, v0 ; 7E040F00 v_mov_b32_e32 v3, 0 ; 7E060280 v_mov_b32_e32 v4, v3 ; 7E080303 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_mov_b32_e32 v5, v1 ; 7E0A0301 v_mov_b32_e32 v6, v2 ; 7E0C0302 v_mov_b32_e32 v7, v3 ; 7E0E0303 v_mov_b32_e32 v8, v4 ; 7E100304 v_mov_b32_e32 v9, v1 ; 7E120301 v_mov_b32_e32 v10, v2 ; 7E140302 v_mov_b32_e32 v11, v3 ; 7E160303 v_mov_b32_e32 v12, v4 ; 7E180304 v_mov_b32_e32 v13, v1 ; 7E1A0301 v_mov_b32_e32 v14, v2 ; 7E1C0302 v_mov_b32_e32 v15, v3 ; 7E1E0303 v_mov_b32_e32 v16, v4 ; 7E200304 v_mov_b32_e32 v17, v1 ; 7E220301 v_mov_b32_e32 v18, v2 ; 7E240302 v_mov_b32_e32 v19, v3 ; 7E260303 v_mov_b32_e32 v20, v4 ; 7E280304 s_load_dwordx8 s[16:23], s[6:7], 0x88 ; C0C80788 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ne_i32_e64 s[0:1], s17, 0 ; D10A0000 00010011 image_load_mip v0, 1, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[16:23] ; F0040100 00040001 s_waitcnt vmcnt(0) ; BF8C0770 v_and_b32_e32 v4, 15, v0 ; 3608008F v_cndmask_b32_e64 v4, 0, v4, s[0:1] ; D2000004 00020880 v_mov_b32_e32 v7, v4 ; 7E0E0304 v_mov_b32_e32 v8, v3 ; 7E100303 image_load v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[5:8], s[8:15] ; F0000F00 00020405 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, 0, v4 ; 06080880 v_add_f32_e32 v5, 0, v5 ; 060A0A80 v_add_f32_e32 v6, 0, v6 ; 060C0C80 v_add_f32_e32 v7, 0, v7 ; 060E0E80 v_lshrrev_b32_e32 v8, 4, v0 ; 2C100084 v_and_b32_e32 v8, 15, v8 ; 3610108F v_cndmask_b32_e64 v8, 1, v8, s[0:1] ; D2000008 00021081 v_mov_b32_e32 v11, v8 ; 7E160308 v_mov_b32_e32 v12, v3 ; 7E180303 image_load v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[8:15] ; F0000F00 00020809 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v8, v4 ; 06080908 v_add_f32_e32 v5, v9, v5 ; 060A0B09 v_add_f32_e32 v6, v10, v6 ; 060C0D0A v_add_f32_e32 v7, v11, v7 ; 060E0F0B v_lshrrev_b32_e32 v8, 8, v0 ; 2C100088 v_and_b32_e32 v8, 15, v8 ; 3610108F v_cndmask_b32_e64 v8, 2, v8, s[0:1] ; D2000008 00021082 v_mov_b32_e32 v15, v8 ; 7E1E0308 v_mov_b32_e32 v16, v3 ; 7E200303 image_load v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[13:16], s[8:15] ; F0000F00 0002080D s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v8, v4 ; 06080908 v_add_f32_e32 v5, v9, v5 ; 060A0B09 v_add_f32_e32 v6, v10, v6 ; 060C0D0A v_add_f32_e32 v7, v11, v7 ; 060E0F0B v_lshrrev_b32_e32 v8, 12, v0 ; 2C10008C v_and_b32_e32 v8, 15, v8 ; 3610108F v_cndmask_b32_e64 v8, 3, v8, s[0:1] ; D2000008 00021083 v_mov_b32_e32 v19, v8 ; 7E260308 v_mov_b32_e32 v20, v3 ; 7E280303 image_load v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[8:15] ; F0000F00 00020811 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v8, v4 ; 06080908 v_add_f32_e32 v5, v9, v5 ; 060A0B09 v_add_f32_e32 v6, v10, v6 ; 060C0D0A v_add_f32_e32 v7, v11, v7 ; 060E0F0B v_lshrrev_b32_e32 v8, 16, v0 ; 2C100090 v_and_b32_e32 v8, 15, v8 ; 3610108F v_cndmask_b32_e64 v8, 4, v8, s[0:1] ; D2000008 00021084 v_mov_b32_e32 v9, v1 ; 7E120301 v_mov_b32_e32 v10, v2 ; 7E140302 v_mov_b32_e32 v11, v3 ; 7E160303 v_mov_b32_e32 v12, v4 ; 7E180304 v_mov_b32_e32 v11, v8 ; 7E160308 v_lshrrev_b32_e32 v8, 20, v0 ; 2C100094 v_and_b32_e32 v8, 15, v8 ; 3610108F v_cndmask_b32_e64 v8, 5, v8, s[0:1] ; D2000008 00021085 v_mov_b32_e32 v13, v1 ; 7E1A0301 v_mov_b32_e32 v14, v2 ; 7E1C0302 v_mov_b32_e32 v15, v3 ; 7E1E0303 v_mov_b32_e32 v16, v4 ; 7E200304 v_mov_b32_e32 v15, v8 ; 7E1E0308 v_lshrrev_b32_e32 v8, 24, v0 ; 2C100098 v_and_b32_e32 v8, 15, v8 ; 3610108F v_cndmask_b32_e64 v8, 6, v8, s[0:1] ; D2000008 00021086 v_mov_b32_e32 v17, v1 ; 7E220301 v_mov_b32_e32 v18, v2 ; 7E240302 v_mov_b32_e32 v19, v3 ; 7E260303 v_mov_b32_e32 v20, v4 ; 7E280304 v_mov_b32_e32 v19, v8 ; 7E260308 v_lshrrev_b32_e32 v0, 28, v0 ; 2C00009C v_cndmask_b32_e64 v0, 7, v0, s[0:1] ; D2000000 00020087 v_mov_b32_e32 v21, v1 ; 7E2A0301 v_mov_b32_e32 v22, v2 ; 7E2C0302 v_mov_b32_e32 v23, v3 ; 7E2E0303 v_mov_b32_e32 v24, v4 ; 7E300304 v_mov_b32_e32 v23, v0 ; 7E2E0300 v_mov_b32_e32 v12, v3 ; 7E180303 v_mov_b32_e32 v16, v3 ; 7E200303 v_mov_b32_e32 v20, v3 ; 7E280303 v_mov_b32_e32 v24, v3 ; 7E300303 image_load v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[8:15] ; F0000F00 00020009 image_load v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[13:16], s[8:15] ; F0000F00 0002080D image_load v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[8:15] ; F0000F00 00020C11 image_load v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[8:15] ; F0000F00 00021015 s_waitcnt vmcnt(3) ; BF8C0773 v_add_f32_e32 v0, v0, v4 ; 06000900 v_add_f32_e32 v1, v1, v5 ; 06020B01 v_add_f32_e32 v2, v2, v6 ; 06040D02 v_add_f32_e32 v3, v3, v7 ; 06060F03 s_waitcnt vmcnt(2) ; BF8C0772 v_add_f32_e32 v0, v8, v0 ; 06000108 v_add_f32_e32 v1, v9, v1 ; 06020309 v_add_f32_e32 v2, v10, v2 ; 0604050A v_add_f32_e32 v3, v11, v3 ; 0606070B s_waitcnt vmcnt(1) ; BF8C0771 v_add_f32_e32 v0, v12, v0 ; 0600010C v_add_f32_e32 v1, v13, v1 ; 0602030D v_add_f32_e32 v2, v14, v2 ; 0604050E v_add_f32_e32 v3, v15, v3 ; 0606070F s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v16, v0 ; 06000110 v_add_f32_e32 v1, v17, v1 ; 06020311 v_add_f32_e32 v2, v18, v2 ; 06040512 v_add_f32_e32 v3, v19, v3 ; 06060713 v_mul_f32_e32 v0, 0x3e000000, v0 ; 100000FF 3E000000 v_mul_f32_e32 v1, 0x3e000000, v1 ; 100202FF 3E000000 v_mul_f32_e32 v2, 0x3e000000, v2 ; 100404FF 3E000000 v_mul_f32_e32 v3, 0x3e000000, v3 ; 100606FF 3E000000 exp 15, 0, 0, 1, 1, v0, v1, v2, v3 ; F800180F 03020100 s_endpgm ; BF810000 Probe color at (12,12) Expected: 0.000000 1.000000 0.000000 0.000000 Observed: 0.000000 0.000000 0.000000 1.000000 FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %23, float %24, float %25) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 exp 15, 0, 0, 1, 1, v0, v1, v2, v3 ; F800180F 03020100 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL OUT[1], POSITION DCL SAMP[0] IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxy 1: TEX OUT[1].z, IN[0], SAMP[0], 2D 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 2 call void @llvm.SI.export(i32 1, i32 0, i32 0, i32 8, i32 0, float %35, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_mov_b32_e32 v0, 0 ; 7E000280 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 4, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800400 00030102 s_waitcnt vmcnt(0) ; BF8C0770 exp 1, 8, 0, 0, 0, v1, v0, v0, v0 ; F8000081 00000001 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 0, 0, 1, 1, v0, v0, v0, v1 ; F800180F 01000000 s_endpgm ; BF810000 FRAG 0: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) ret void } declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } Shader Disassembly: v_mov_b32_e32 v0, 0 ; 7E000280 exp 0, 0, 0, 1, 1, v0, v0, v0, v0 ; F8001800 00000000 s_endpgm ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], COLOR 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: MOV OUT[2], IN[2] 3: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = extractelement <4 x float> %14, i32 3 %19 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %20 = load <16 x i8> addrspace(2)* %19, !tbaa !0 %21 = add i32 %5, %7 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = extractelement <4 x float> %22, i32 2 %26 = extractelement <4 x float> %22, i32 3 %27 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %28 = load <16 x i8> addrspace(2)* %27, !tbaa !0 %29 = add i32 %5, %7 %30 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %29) %31 = extractelement <4 x float> %30, i32 0 %32 = extractelement <4 x float> %30, i32 1 %33 = extractelement <4 x float> %30, i32 2 %34 = extractelement <4 x float> %30, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %31, float %32, float %33, float %34) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900 s_waitcnt vmcnt(1) ; BF8C0771 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v9, v10, v11, v12 ; F800021F 0C0B0A09 exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201 s_endpgm ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL IN[1], GENERIC[0], PERSPECTIVE DCL OUT[0], POSITION DCL OUT[1], COLOR DCL SAMP[0] 0: TEX OUT[0].z, IN[1], SAMP[0], 2D 1: MOV OUT[1], IN[0] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 2) %39 = extractelement <4 x float> %38, i32 2 %40 = call i32 @llvm.SI.packf16(float %26, float %27) %41 = bitcast i32 %40 to float %42 = call i32 @llvm.SI.packf16(float %28, float %29) %43 = bitcast i32 %42 to float call void @llvm.SI.export(i32 1, i32 0, i32 0, i32 8, i32 0, float %39, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %41, float %43, float %41, float %43) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 4, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800400 00020006 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v2, v4, v5 ; 5E040B04 v_mov_b32_e32 v3, 0 ; 7E060280 s_waitcnt vmcnt(0) ; BF8C0770 exp 1, 8, 0, 0, 0, v0, v3, v3, v3 ; F8000081 03030300 exp 15, 0, 1, 1, 1, v1, v2, v1, v2 ; F8001C0F 02010201 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[0], LINEAR DCL SAMP[0] DCL OUT[0], POSITION DCL TEMP[0] 0: F2U TEMP[0], IN[0] 1: TXF OUT[0].z, TEMP[0], SAMP[0], 2D_MSAA 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 17 %25 = load <8 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %11) %29 = fptoui float %26 to i32 %30 = fptoui float %27 to i32 %31 = fptoui float %28 to i32 %32 = bitcast i32 %29 to float %33 = bitcast i32 %30 to float %34 = bitcast i32 %31 to float %35 = bitcast float %34 to i32 %36 = bitcast float %32 to i32 %37 = bitcast float %33 to i32 %38 = insertelement <4 x i32> undef, i32 %36, i32 0 %39 = insertelement <4 x i32> %38, i32 %37, i32 1 %40 = insertelement <4 x i32> %39, i32 0, i32 2 %41 = insertelement <4 x i32> %40, i32 0, i32 3 %42 = bitcast <8 x i32> %25 to <32 x i8> %43 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %41, <32 x i8> %42, i32 2) %44 = extractelement <4 x i32> %43, i32 0 %45 = mul i32 %35, 4 %46 = lshr i32 %44, %45 %47 = and i32 %46, 15 %48 = extractelement <8 x i32> %25, i32 1 %49 = icmp ne i32 %48, 0 %50 = select i1 %49, i32 %47, i32 %35 %51 = insertelement <4 x i32> undef, i32 %36, i32 0 %52 = insertelement <4 x i32> %51, i32 %37, i32 1 %53 = insertelement <4 x i32> %52, i32 %50, i32 2 %54 = insertelement <4 x i32> %53, i32 0, i32 3 %55 = bitcast <8 x i32> %23 to <32 x i8> %56 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %54, <32 x i8> %55, i32 14) %57 = extractelement <4 x i32> %56, i32 2 %58 = bitcast i32 %57 to float call void @llvm.SI.export(i32 1, i32 1, i32 1, i32 8, i32 0, float %58, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx8 s[12:19], s[6:7], 0x88 ; C0C60788 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v0, v0, 3, 0, [m0] ; C8000300 v_interp_p2_f32 v0, [v0], v1, 3, 0, [m0] ; C8010301 v_cvt_u32_f32_e32 v4, v2 ; 7E080F02 v_cvt_u32_f32_e32 v5, v3 ; 7E0A0F03 s_load_dwordx8 s[0:7], s[6:7], 0x0 ; C0C00700 v_cvt_u32_f32_e32 v0, v0 ; 7E000F00 v_mov_b32_e32 v6, 0 ; 7E0C0280 v_mov_b32_e32 v7, v6 ; 7E0E0306 s_waitcnt lgkmcnt(0) ; BF8C007F image_load_mip v1, 1, 0, 0, 0, 0, 0, 0, 0, v[4:7], s[12:19] ; F0040100 00030104 v_cmp_ne_i32_e64 s[8:9], s13, 0 ; D10A0008 0001000D v_lshlrev_b32_e32 v2, 2, v0 ; 34040082 s_waitcnt vmcnt(0) ; BF8C0770 v_lshr_b32_e32 v1, v1, v2 ; 2A020501 v_and_b32_e32 v1, 15, v1 ; 3602028F v_cndmask_b32_e64 v0, v0, v1, s[8:9] ; D2000000 00220300 v_mov_b32_e32 v1, v4 ; 7E020304 v_mov_b32_e32 v2, v5 ; 7E040305 v_mov_b32_e32 v3, v6 ; 7E060306 v_mov_b32_e32 v4, v7 ; 7E080307 v_mov_b32_e32 v3, v0 ; 7E060300 v_mov_b32_e32 v4, v6 ; 7E080306 image_load v0, 4, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[0:7] ; F0000400 00000001 s_waitcnt vmcnt(0) ; BF8C0770 exp 1, 8, 0, 1, 1, v0, v6, v6, v6 ; F8001881 06060600 s_endpgm ; BF810000 FRAG DCL IN[0], COLOR, COLOR DCL IN[1], GENERIC[0], PERSPECTIVE DCL OUT[0], STENCIL DCL SAMP[0] DCL SAMP[1] 0: TEX OUT[0].y, IN[1], SAMP[1], 2D 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 1 call void @llvm.SI.export(i32 2, i32 1, i32 1, i32 8, i32 0, float 0.000000e+00, float %35, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x8 ; C0C60708 s_load_dwordx4 s[0:3], s[4:5], 0x4 ; C0800504 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 2, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800200 00030002 v_mov_b32_e32 v1, 0 ; 7E020280 s_waitcnt vmcnt(0) ; BF8C0770 exp 2, 8, 0, 1, 1, v1, v0, v1, v1 ; F8001882 01010001 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[0], LINEAR DCL SAMP[0..1] DCL OUT[0], POSITION DCL OUT[1], STENCIL DCL TEMP[0] 0: F2U TEMP[0], IN[0] 1: TXF OUT[0].z, TEMP[0], SAMP[0], 2D_MSAA 2: TXF OUT[1].y, TEMP[0], SAMP[1], 2D_MSAA 3: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 17 %25 = load <8 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 18 %29 = load <8 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %32 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %11) %33 = fptoui float %30 to i32 %34 = fptoui float %31 to i32 %35 = fptoui float %32 to i32 %36 = bitcast i32 %33 to float %37 = bitcast i32 %34 to float %38 = bitcast i32 %35 to float %39 = bitcast float %38 to i32 %40 = bitcast float %36 to i32 %41 = bitcast float %37 to i32 %42 = insertelement <4 x i32> undef, i32 %40, i32 0 %43 = insertelement <4 x i32> %42, i32 %41, i32 1 %44 = insertelement <4 x i32> %43, i32 0, i32 2 %45 = insertelement <4 x i32> %44, i32 0, i32 3 %46 = bitcast <8 x i32> %25 to <32 x i8> %47 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %45, <32 x i8> %46, i32 2) %48 = extractelement <4 x i32> %47, i32 0 %49 = mul i32 %39, 4 %50 = lshr i32 %48, %49 %51 = and i32 %50, 15 %52 = extractelement <8 x i32> %25, i32 1 %53 = icmp ne i32 %52, 0 %54 = select i1 %53, i32 %51, i32 %39 %55 = insertelement <4 x i32> undef, i32 %40, i32 0 %56 = insertelement <4 x i32> %55, i32 %41, i32 1 %57 = insertelement <4 x i32> %56, i32 %54, i32 2 %58 = insertelement <4 x i32> %57, i32 0, i32 3 %59 = bitcast <8 x i32> %23 to <32 x i8> %60 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %58, <32 x i8> %59, i32 14) %61 = extractelement <4 x i32> %60, i32 2 %62 = bitcast i32 %61 to float %63 = bitcast float %38 to i32 %64 = bitcast float %36 to i32 %65 = bitcast float %37 to i32 %66 = insertelement <4 x i32> undef, i32 %64, i32 0 %67 = insertelement <4 x i32> %66, i32 %65, i32 1 %68 = insertelement <4 x i32> %67, i32 0, i32 2 %69 = insertelement <4 x i32> %68, i32 0, i32 3 %70 = bitcast <8 x i32> %29 to <32 x i8> %71 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %69, <32 x i8> %70, i32 2) %72 = extractelement <4 x i32> %71, i32 0 %73 = mul i32 %63, 4 %74 = lshr i32 %72, %73 %75 = and i32 %74, 15 %76 = extractelement <8 x i32> %29, i32 1 %77 = icmp ne i32 %76, 0 %78 = select i1 %77, i32 %75, i32 %63 %79 = insertelement <4 x i32> undef, i32 %64, i32 0 %80 = insertelement <4 x i32> %79, i32 %65, i32 1 %81 = insertelement <4 x i32> %80, i32 %78, i32 2 %82 = insertelement <4 x i32> %81, i32 0, i32 3 %83 = bitcast <8 x i32> %27 to <32 x i8> %84 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %82, <32 x i8> %83, i32 14) %85 = extractelement <4 x i32> %84, i32 1 %86 = bitcast i32 %85 to float call void @llvm.SI.export(i32 3, i32 1, i32 1, i32 8, i32 0, float %62, float %86, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v0, v0, 3, 0, [m0] ; C8000300 v_interp_p2_f32 v0, [v0], v1, 3, 0, [m0] ; C8010301 v_cvt_u32_f32_e32 v4, v2 ; 7E080F02 s_load_dwordx8 s[8:15], s[6:7], 0x88 ; C0C40788 v_cvt_u32_f32_e32 v5, v3 ; 7E0A0F03 v_cvt_u32_f32_e32 v0, v0 ; 7E000F00 v_mov_b32_e32 v6, 0 ; 7E0C0280 v_mov_b32_e32 v7, v6 ; 7E0E0306 s_load_dwordx8 s[16:23], s[6:7], 0x90 ; C0C80790 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ne_i32_e64 s[0:1], s9, 0 ; D10A0000 00010009 image_load_mip v1, 1, 0, 0, 0, 0, 0, 0, 0, v[4:7], s[8:15] ; F0040100 00020104 v_lshlrev_b32_e32 v2, 2, v0 ; 34040082 s_waitcnt vmcnt(0) ; BF8C0770 v_lshr_b32_e32 v1, v1, v2 ; 2A020501 s_load_dwordx8 s[4:11], s[6:7], 0x8 ; C0C20708 v_and_b32_e32 v1, 15, v1 ; 3602028F v_cndmask_b32_e64 v1, v0, v1, s[0:1] ; D2000001 00020300 v_mov_b32_e32 v8, v4 ; 7E100304 v_mov_b32_e32 v9, v5 ; 7E120305 v_mov_b32_e32 v10, v6 ; 7E140306 v_mov_b32_e32 v11, v7 ; 7E160307 v_mov_b32_e32 v10, v1 ; 7E140301 v_mov_b32_e32 v11, v6 ; 7E160306 image_load v1, 4, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[24:31] ; F0000400 00060108 image_load_mip v3, 1, 0, 0, 0, 0, 0, 0, 0, v[4:7], s[16:23] ; F0040100 00040304 v_cmp_ne_i32_e64 s[0:1], s17, 0 ; D10A0000 00010011 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_lshr_b32_e32 v2, v3, v2 ; 2A040503 v_and_b32_e32 v2, 15, v2 ; 3604048F v_cndmask_b32_e64 v0, v0, v2, s[0:1] ; D2000000 00020500 v_mov_b32_e32 v2, v4 ; 7E040304 v_mov_b32_e32 v3, v5 ; 7E060305 v_mov_b32_e32 v4, v6 ; 7E080306 v_mov_b32_e32 v5, v7 ; 7E0A0307 v_mov_b32_e32 v4, v0 ; 7E080300 v_mov_b32_e32 v5, v6 ; 7E0A0306 image_load v0, 2, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[4:11] ; F0000200 00010002 s_waitcnt vmcnt(0) ; BF8C0770 exp 3, 8, 0, 1, 1, v1, v0, v6, v6 ; F8001883 06060001 s_endpgm ; BF810000 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL OUT[1], POSITION DCL OUT[2], STENCIL DCL SAMP[0] DCL SAMP[1] IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxy 1: TEX OUT[1].z, IN[0], SAMP[0], 2D 2: TEX OUT[2].y, IN[0], SAMP[1], 2D 3: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 2) %39 = extractelement <4 x float> %38, i32 2 %40 = bitcast float %30 to i32 %41 = bitcast float %31 to i32 %42 = insertelement <2 x i32> undef, i32 %40, i32 0 %43 = insertelement <2 x i32> %42, i32 %41, i32 1 %44 = bitcast <8 x i32> %27 to <32 x i8> %45 = bitcast <4 x i32> %29 to <16 x i8> %46 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %43, <32 x i8> %44, <16 x i8> %45, i32 2) %47 = extractelement <4 x float> %46, i32 1 call void @llvm.SI.export(i32 3, i32 0, i32 0, i32 8, i32 0, float %39, float %47, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v2, 0 ; 7E040280 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_load_dwordx4 s[4:7], s[4:5], 0x4 ; C0820504 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 4, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[8:15], s[0:3] ; F0800400 00020004 image_sample v1, 2, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[16:23], s[4:7] ; F0800200 00240104 s_waitcnt vmcnt(0) ; BF8C0770 exp 3, 8, 0, 0, 0, v0, v1, v2, v2 ; F8000083 02020100 exp 15, 0, 0, 1, 1, v2, v2, v2, v3 ; F800180F 03020202 s_endpgm ; BF810000 PIGLIT: {"result": "fail" }