Command: /home/amarildo/.local/share/Steam/steamapps/common/X-Plane 10/X-Plane-x86_64 --force_run Driver vendor: X.Org Device vendor: AMD Device name: AMD PITCAIRN (DRM 3.9.0 / 4.7.0-1-ARCH-g841c4eb-dirty, LLVM 4.0.0) Draw call sequence # = 67 HW reached sequence # = 66 Elapsed time = 2000 ms blit: dst.resource: {target = 2d, format = PIPE_FORMAT_R16G16B16A16_FLOAT, width0 = 256, height0 = 128, depth0 = 1, array_size = 1, last_level = 8, nr_samples = 0, usage = 0, bind = 10, flags = 4, } dst.level: 0 dst.box: {x = 0, y = 0, z = 0, width = 256, height = 128, depth = 1, } dst.format: PIPE_FORMAT_R16G16B16A16_FLOAT src.resource: {target = 2d, format = PIPE_FORMAT_R32G32B32A32_FLOAT, width0 = 256, height0 = 128, depth0 = 1, array_size = 1, last_level = 0, nr_samples = 0, usage = 4, bind = 8, flags = 0, } src.level: 0 src.box: {x = 0, y = 0, z = 0, width = 256, height = 128, depth = 1, } src.format: PIPE_FORMAT_R32G32B32A32_FLOAT mask: 0xf filter: 0 scissor_enable: 0 scissor: {minx = 0, miny = 0, maxx = 0, maxy = 0, } render_condition_enable: 0 ***************************************************************************** Driver-specific state: SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 Vertex Shader as VS - main shader part - LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) { main_body: %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !invariant.load !0 %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %14) %20 = extractelement <4 x float> %19, i32 0 %21 = extractelement <4 x float> %19, i32 1 %22 = extractelement <4 x float> %19, i32 2 %23 = extractelement <4 x float> %19, i32 3 %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !invariant.load !0 %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %15) %27 = extractelement <4 x float> %26, i32 0 %28 = extractelement <4 x float> %26, i32 1 %29 = extractelement <4 x float> %26, i32 2 %30 = extractelement <4 x float> %26, i32 3 %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2, !amdgpu.uniform !0 %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !invariant.load !0 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %16) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !invariant.load !0 %38 = call float @llvm.SI.load.const(<16 x i8> %37, i32 0) %39 = fmul float %38, %20 %40 = call float @llvm.SI.load.const(<16 x i8> %37, i32 4) %41 = fmul float %40, %20 %42 = call float @llvm.SI.load.const(<16 x i8> %37, i32 8) %43 = fmul float %42, %20 %44 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !invariant.load !0 %46 = call float @llvm.SI.load.const(<16 x i8> %45, i32 12) %47 = fmul float %46, %20 %48 = call float @llvm.SI.load.const(<16 x i8> %45, i32 16) %49 = fmul float %48, %21 %50 = fadd float %49, %39 %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !invariant.load !0 %53 = call float @llvm.SI.load.const(<16 x i8> %52, i32 20) %54 = fmul float %53, %21 %55 = fadd float %54, %41 %56 = call float @llvm.SI.load.const(<16 x i8> %52, i32 24) %57 = fmul float %56, %21 %58 = fadd float %57, %43 %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !invariant.load !0 %61 = call float @llvm.SI.load.const(<16 x i8> %60, i32 28) %62 = fmul float %61, %21 %63 = fadd float %62, %47 %64 = call float @llvm.SI.load.const(<16 x i8> %60, i32 32) %65 = fmul float %64, %22 %66 = fadd float %65, %50 %67 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !invariant.load !0 %69 = call float @llvm.SI.load.const(<16 x i8> %68, i32 36) %70 = fmul float %69, %22 %71 = fadd float %70, %55 %72 = call float @llvm.SI.load.const(<16 x i8> %68, i32 40) %73 = fmul float %72, %22 %74 = fadd float %73, %58 %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !invariant.load !0 %77 = call float @llvm.SI.load.const(<16 x i8> %76, i32 44) %78 = fmul float %77, %22 %79 = fadd float %78, %63 %80 = call float @llvm.SI.load.const(<16 x i8> %76, i32 48) %81 = fmul float %80, %23 %82 = fadd float %81, %66 %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !invariant.load !0 %85 = call float @llvm.SI.load.const(<16 x i8> %84, i32 52) %86 = fmul float %85, %23 %87 = fadd float %86, %71 %88 = call float @llvm.SI.load.const(<16 x i8> %84, i32 56) %89 = fmul float %88, %23 %90 = fadd float %89, %74 %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !invariant.load !0 %93 = call float @llvm.SI.load.const(<16 x i8> %92, i32 60) %94 = fmul float %93, %23 %95 = fadd float %94, %79 %96 = call float @llvm.SI.load.const(<16 x i8> %92, i32 64) %97 = fmul float %96, %20 %98 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %99 = load <16 x i8>, <16 x i8> addrspace(2)* %98, align 16, !invariant.load !0 %100 = call float @llvm.SI.load.const(<16 x i8> %99, i32 68) %101 = fmul float %100, %20 %102 = call float @llvm.SI.load.const(<16 x i8> %99, i32 72) %103 = fmul float %102, %20 %104 = call float @llvm.SI.load.const(<16 x i8> %99, i32 76) %105 = fmul float %104, %20 %106 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %107 = load <16 x i8>, <16 x i8> addrspace(2)* %106, align 16, !invariant.load !0 %108 = call float @llvm.SI.load.const(<16 x i8> %107, i32 80) %109 = fmul float %108, %21 %110 = fadd float %109, %97 %111 = call float @llvm.SI.load.const(<16 x i8> %107, i32 84) %112 = fmul float %111, %21 %113 = fadd float %112, %101 %114 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %115 = load <16 x i8>, <16 x i8> addrspace(2)* %114, align 16, !invariant.load !0 %116 = call float @llvm.SI.load.const(<16 x i8> %115, i32 88) %117 = fmul float %116, %21 %118 = fadd float %117, %103 %119 = call float @llvm.SI.load.const(<16 x i8> %115, i32 92) %120 = fmul float %119, %21 %121 = fadd float %120, %105 %122 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %123 = load <16 x i8>, <16 x i8> addrspace(2)* %122, align 16, !invariant.load !0 %124 = call float @llvm.SI.load.const(<16 x i8> %123, i32 96) %125 = fmul float %124, %22 %126 = fadd float %125, %110 %127 = call float @llvm.SI.load.const(<16 x i8> %123, i32 100) %128 = fmul float %127, %22 %129 = fadd float %128, %113 %130 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %131 = load <16 x i8>, <16 x i8> addrspace(2)* %130, align 16, !invariant.load !0 %132 = call float @llvm.SI.load.const(<16 x i8> %131, i32 104) %133 = fmul float %132, %22 %134 = fadd float %133, %118 %135 = call float @llvm.SI.load.const(<16 x i8> %131, i32 108) %136 = fmul float %135, %22 %137 = fadd float %136, %121 %138 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %139 = load <16 x i8>, <16 x i8> addrspace(2)* %138, align 16, !invariant.load !0 %140 = call float @llvm.SI.load.const(<16 x i8> %139, i32 112) %141 = fmul float %140, %23 %142 = fadd float %141, %126 %143 = call float @llvm.SI.load.const(<16 x i8> %139, i32 116) %144 = fmul float %143, %23 %145 = fadd float %144, %129 %146 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %147 = load <16 x i8>, <16 x i8> addrspace(2)* %146, align 16, !invariant.load !0 %148 = call float @llvm.SI.load.const(<16 x i8> %147, i32 120) %149 = fmul float %148, %23 %150 = fadd float %149, %134 %151 = call float @llvm.SI.load.const(<16 x i8> %147, i32 124) %152 = fmul float %151, %23 %153 = fadd float %152, %137 %154 = bitcast i32 %12 to float %155 = insertvalue <{ float, float, float }> undef, float %154, 2 %156 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i64 14, !amdgpu.uniform !0 %157 = load <16 x i8>, <16 x i8> addrspace(2)* %156, align 16, !invariant.load !0 %158 = call float @llvm.SI.load.const(<16 x i8> %157, i32 0) %159 = fmul float %158, %142 %160 = call float @llvm.SI.load.const(<16 x i8> %157, i32 4) %161 = fmul float %160, %145 %162 = fadd float %159, %161 %163 = call float @llvm.SI.load.const(<16 x i8> %157, i32 8) %164 = fmul float %163, %150 %165 = fadd float %162, %164 %166 = call float @llvm.SI.load.const(<16 x i8> %157, i32 12) %167 = fmul float %166, %153 %168 = fadd float %165, %167 %169 = call float @llvm.SI.load.const(<16 x i8> %157, i32 16) %170 = fmul float %169, %142 %171 = call float @llvm.SI.load.const(<16 x i8> %157, i32 20) %172 = fmul float %171, %145 %173 = fadd float %170, %172 %174 = call float @llvm.SI.load.const(<16 x i8> %157, i32 24) %175 = fmul float %174, %150 %176 = fadd float %173, %175 %177 = call float @llvm.SI.load.const(<16 x i8> %157, i32 28) %178 = fmul float %177, %153 %179 = fadd float %176, %178 %180 = call float @llvm.SI.load.const(<16 x i8> %157, i32 32) %181 = fmul float %180, %142 %182 = call float @llvm.SI.load.const(<16 x i8> %157, i32 36) %183 = fmul float %182, %145 %184 = fadd float %181, %183 %185 = call float @llvm.SI.load.const(<16 x i8> %157, i32 40) %186 = fmul float %185, %150 %187 = fadd float %184, %186 %188 = call float @llvm.SI.load.const(<16 x i8> %157, i32 44) %189 = fmul float %188, %153 %190 = fadd float %187, %189 %191 = call float @llvm.SI.load.const(<16 x i8> %157, i32 48) %192 = fmul float %191, %142 %193 = call float @llvm.SI.load.const(<16 x i8> %157, i32 52) %194 = fmul float %193, %145 %195 = fadd float %192, %194 %196 = call float @llvm.SI.load.const(<16 x i8> %157, i32 56) %197 = fmul float %196, %150 %198 = fadd float %195, %197 %199 = call float @llvm.SI.load.const(<16 x i8> %157, i32 60) %200 = fmul float %199, %153 %201 = fadd float %198, %200 %202 = call float @llvm.SI.load.const(<16 x i8> %157, i32 64) %203 = fmul float %202, %142 %204 = call float @llvm.SI.load.const(<16 x i8> %157, i32 68) %205 = fmul float %204, %145 %206 = fadd float %203, %205 %207 = call float @llvm.SI.load.const(<16 x i8> %157, i32 72) %208 = fmul float %207, %150 %209 = fadd float %206, %208 %210 = call float @llvm.SI.load.const(<16 x i8> %157, i32 76) %211 = fmul float %210, %153 %212 = fadd float %209, %211 %213 = call float @llvm.SI.load.const(<16 x i8> %157, i32 80) %214 = fmul float %213, %142 %215 = call float @llvm.SI.load.const(<16 x i8> %157, i32 84) %216 = fmul float %215, %145 %217 = fadd float %214, %216 %218 = call float @llvm.SI.load.const(<16 x i8> %157, i32 88) %219 = fmul float %218, %150 %220 = fadd float %217, %219 %221 = call float @llvm.SI.load.const(<16 x i8> %157, i32 92) %222 = fmul float %221, %153 %223 = fadd float %220, %222 %224 = call float @llvm.SI.load.const(<16 x i8> %157, i32 96) %225 = fmul float %224, %142 %226 = call float @llvm.SI.load.const(<16 x i8> %157, i32 100) %227 = fmul float %226, %145 %228 = fadd float %225, %227 %229 = call float @llvm.SI.load.const(<16 x i8> %157, i32 104) %230 = fmul float %229, %150 %231 = fadd float %228, %230 %232 = call float @llvm.SI.load.const(<16 x i8> %157, i32 108) %233 = fmul float %232, %153 %234 = fadd float %231, %233 %235 = call float @llvm.SI.load.const(<16 x i8> %157, i32 112) %236 = fmul float %235, %142 %237 = call float @llvm.SI.load.const(<16 x i8> %157, i32 116) %238 = fmul float %237, %145 %239 = fadd float %236, %238 %240 = call float @llvm.SI.load.const(<16 x i8> %157, i32 120) %241 = fmul float %240, %150 %242 = fadd float %239, %241 %243 = call float @llvm.SI.load.const(<16 x i8> %157, i32 124) %244 = fmul float %243, %153 %245 = fadd float %242, %244 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %27, float %28, float %29, float %30) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %34, float %35, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %82, float %87, float %90, float %95) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 13, i32 0, float %168, float %179, float %190, float %201) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 14, i32 0, float %212, float %223, float %234, float %245) ret <{ float, float, float }> %155 } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #0 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } !0 = !{} Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C v_mov_b32_e32 v5, v4 ; 7E0A0304 v_mov_b32_e32 v6, v4 ; 7E0C0304 Shader main disassembly: s_load_dwordx4 s[12:15], s[10:11], 0x0 ; C0860B00 s_load_dwordx4 s[16:19], s[10:11], 0x4 ; C0880B04 s_load_dwordx4 s[4:7], s[10:11], 0x8 ; C0820B08 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[7:10], v4, s[12:15], 0 idxen ; E00C2000 80030704 buffer_load_format_xyzw v[11:14], v5, s[16:19], 0 idxen ; E00C2000 80040B05 buffer_load_format_xyzw v[3:6], v6, s[4:7], 0 idxen ; E00C2000 80010306 s_buffer_load_dword s2, s[8:11], 0x0 ; C2010900 s_buffer_load_dword s3, s[8:11], 0x1 ; C2018901 s_buffer_load_dword s27, s[8:11], 0x11 ; C20D8911 s_buffer_load_dword s26, s[8:11], 0x10 ; C20D0910 s_buffer_load_dword s31, s[8:11], 0x15 ; C20F8915 s_buffer_load_dword s28, s[8:11], 0x12 ; C20E0912 s_buffer_load_dword s30, s[8:11], 0x14 ; C20F0914 s_buffer_load_dword s35, s[8:11], 0x19 ; C2118919 s_buffer_load_dword s12, s[8:11], 0x2 ; C2060902 s_buffer_load_dword s13, s[8:11], 0x3 ; C2068903 s_buffer_load_dword s29, s[8:11], 0x13 ; C20E8913 s_buffer_load_dword s32, s[8:11], 0x16 ; C2100916 s_buffer_load_dword s34, s[8:11], 0x18 ; C2110918 s_buffer_load_dword s39, s[8:11], 0x1d ; C213891D s_buffer_load_dword s33, s[8:11], 0x17 ; C2108917 s_buffer_load_dword s36, s[8:11], 0x1a ; C212091A s_buffer_load_dword s38, s[8:11], 0x1c ; C213091C s_buffer_load_dword s37, s[8:11], 0x1b ; C212891B s_buffer_load_dword s40, s[8:11], 0x1e ; C214091E s_buffer_load_dword s14, s[8:11], 0x4 ; C2070904 s_buffer_load_dword s15, s[8:11], 0x5 ; C2078905 s_buffer_load_dword s16, s[8:11], 0x6 ; C2080906 s_buffer_load_dword s17, s[8:11], 0x7 ; C2088907 s_buffer_load_dword s18, s[8:11], 0x8 ; C2090908 s_buffer_load_dword s19, s[8:11], 0x9 ; C2098909 s_buffer_load_dword s20, s[8:11], 0xa ; C20A090A s_buffer_load_dword s21, s[8:11], 0xb ; C20A890B s_buffer_load_dword s22, s[8:11], 0xc ; C20B090C s_buffer_load_dword s23, s[8:11], 0xd ; C20B890D s_buffer_load_dword s24, s[8:11], 0xe ; C20C090E s_buffer_load_dword s25, s[8:11], 0xf ; C20C890F s_buffer_load_dword s8, s[8:11], 0x1f ; C204091F s_waitcnt vmcnt(2) lgkmcnt(0) ; BF8C0072 v_mul_f32_e32 v0, s2, v7 ; 10000E02 v_mul_f32_e32 v1, s3, v7 ; 10020E03 s_load_dwordx4 s[0:3], s[0:1], 0x38 ; C0800138 v_mul_f32_e32 v18, s27, v7 ; 10240E1B v_mul_f32_e32 v17, s26, v7 ; 10220E1A v_mac_f32_e32 v18, s31, v8 ; 3E24101F v_mul_f32_e32 v19, s28, v7 ; 10260E1C s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 v_mac_f32_e32 v17, s30, v8 ; 3E22101E v_mac_f32_e32 v18, s35, v9 ; 3E241223 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 v_mul_f32_e32 v15, s12, v7 ; 101E0E0C v_mul_f32_e32 v16, s13, v7 ; 10200E0D v_mul_f32_e32 v7, s29, v7 ; 100E0E1D v_mac_f32_e32 v19, s32, v8 ; 3E261020 v_mac_f32_e32 v17, s34, v9 ; 3E221222 v_mac_f32_e32 v18, s39, v10 ; 3E241427 v_mac_f32_e32 v7, s33, v8 ; 3E0E1021 v_mac_f32_e32 v19, s36, v9 ; 3E261224 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v5, s5, v18 ; 100A2405 v_mac_f32_e32 v17, s38, v10 ; 3E221426 v_mac_f32_e32 v7, s37, v9 ; 3E0E1225 v_mac_f32_e32 v5, s4, v17 ; 3E0A2204 v_mac_f32_e32 v19, s40, v10 ; 3E261428 v_mac_f32_e32 v7, s8, v10 ; 3E0E1408 v_mac_f32_e32 v5, s6, v19 ; 3E0A2606 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 v_mac_f32_e32 v5, s7, v7 ; 3E0A0E07 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s7, s[0:3], 0xd ; C203810D s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_buffer_load_dword s6, s[0:3], 0xc ; C203010C s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s5, s[0:3], 0xb ; C202810B v_mac_f32_e32 v0, s14, v8 ; 3E00100E v_mac_f32_e32 v1, s15, v8 ; 3E02100F v_mac_f32_e32 v15, s16, v8 ; 3E1E1010 v_mac_f32_e32 v16, s17, v8 ; 3E201011 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s13, v18 ; 1010240D v_mac_f32_e32 v0, s18, v9 ; 3E001212 v_mac_f32_e32 v1, s19, v9 ; 3E021213 v_mac_f32_e32 v15, s20, v9 ; 3E1E1214 v_mac_f32_e32 v16, s21, v9 ; 3E201215 v_mul_f32_e32 v9, s7, v18 ; 10122407 v_mac_f32_e32 v8, s12, v17 ; 3E10220C v_mul_f32_e32 v6, s9, v18 ; 100C2409 v_mac_f32_e32 v8, s4, v19 ; 3E102604 v_mac_f32_e32 v9, s6, v17 ; 3E122206 s_buffer_load_dword s6, s[0:3], 0x11 ; C2030111 v_mac_f32_e32 v6, s8, v17 ; 3E0C2208 s_buffer_load_dword s8, s[0:3], 0xe ; C204010E v_mac_f32_e32 v8, s5, v7 ; 3E100E05 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F v_mac_f32_e32 v0, s22, v10 ; 3E001416 v_mac_f32_e32 v1, s23, v10 ; 3E021417 v_mac_f32_e32 v15, s24, v10 ; 3E1E1418 v_mac_f32_e32 v16, s25, v10 ; 3E201419 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s6, v18 ; 10142406 v_mac_f32_e32 v9, s8, v19 ; 3E122608 v_mac_f32_e32 v10, s5, v17 ; 3E142205 s_buffer_load_dword s7, s[0:3], 0x12 ; C2038112 s_buffer_load_dword s5, s[0:3], 0x15 ; C2028115 v_mac_f32_e32 v9, s4, v7 ; 3E120E04 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_buffer_load_dword s6, s[0:3], 0x16 ; C2030116 exp 15, 32, 0, 0, 0, v11, v12, v13, v14 ; F800020F 0E0D0C0B s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v11, s5, v18 ; 10162405 v_mac_f32_e32 v10, s7, v19 ; 3E142607 s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113 s_buffer_load_dword s7, s[0:3], 0x17 ; C2038117 v_mac_f32_e32 v11, s4, v17 ; 3E162204 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 v_mac_f32_e32 v11, s6, v19 ; 3E162606 s_buffer_load_dword s9, s[0:3], 0x1d ; C204811D s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v10, s8, v7 ; 3E140E08 v_mac_f32_e32 v11, s7, v7 ; 3E160E07 s_buffer_load_dword s8, s[0:3], 0x18 ; C2040118 s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C v_mul_f32_e32 v12, s4, v18 ; 10182404 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s5, s[0:3], 0x1a ; C202811A s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_buffer_load_dword s6, s[0:3], 0x1b ; C203011B s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F v_mul_f32_e32 v13, s9, v18 ; 101A2409 v_mac_f32_e32 v6, s10, v19 ; 3E0C260A s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v12, s8, v17 ; 3E182208 v_mac_f32_e32 v13, s7, v17 ; 3E1A2207 exp 15, 33, 0, 0, 0, v3, v4, v0, v0 ; F800021F 00000403 v_mac_f32_e32 v12, s5, v19 ; 3E182605 v_mac_f32_e32 v13, s4, v19 ; 3E1A2604 v_mac_f32_e32 v6, s11, v7 ; 3E0C0E0B exp 15, 12, 0, 0, 0, v0, v1, v15, v16 ; F80000CF 100F0100 v_mac_f32_e32 v12, s6, v7 ; 3E180E06 v_mac_f32_e32 v13, s0, v7 ; 3E1A0E00 exp 15, 13, 0, 0, 0, v5, v6, v8, v9 ; F80000DF 09080605 exp 15, 14, 0, 1, 0, v10, v11, v12, v13 ; F80008EF 0D0C0B0A s_waitcnt expcnt(0) ; BF8C0F0F Shader epilog disassembly: s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 20 Spilled SGPRs: 0 Spilled VGPRs: 0 Code Size: 652 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** SHADER KEY prolog.color_two_side = 0 prolog.flatshade_colors = 0 prolog.poly_stipple = 0 prolog.force_persp_sample_interp = 0 prolog.force_linear_sample_interp = 0 prolog.force_persp_center_interp = 0 prolog.force_linear_center_interp = 0 prolog.bc_optimize_for_persp = 0 prolog.bc_optimize_for_linear = 0 epilog.spi_shader_col_format = 0x4 epilog.color_is_int8 = 0x0 epilog.last_cbuf = 0 epilog.alpha_func = 7 epilog.alpha_to_one = 0 epilog.poly_line_smoothing = 0 epilog.clamp_color = 0 Pixel Shader - main shader part - LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %24 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !invariant.load !0 %27 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %28 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %27, i64 0, i64 3, !amdgpu.uniform !0 %29 = load <4 x i32>, <4 x i32> addrspace(2)* %28, align 16, !invariant.load !0 %30 = extractelement <8 x i32> %26, i32 7 %31 = extractelement <4 x i32> %29, i32 0 %32 = and i32 %31, %30 %33 = insertelement <4 x i32> %29, i32 %32, i32 0 %34 = bitcast float %23 to i32 %35 = bitcast float %24 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %37, <8 x i32> %26, <4 x i32> %33, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %44 = fmul float %43, %39 %45 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %46 = fmul float %45, %40 %47 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %48 = fmul float %47, %41 %49 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %50 = fmul float %49, %42 %51 = bitcast float %5 to i32 %52 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %51, 10 %53 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %52, float %44, 11 %54 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %53, float %46, 12 %55 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %54, float %48, 13 %56 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %55, float %50, 14 %57 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %56, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %57 } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" } attributes #1 = { nounwind readnone } !0 = !{} Pixel Shader: Shader main disassembly: s_mov_b64 s[6:7], exec ; BE86047E s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C s_mov_b32 m0, s11 ; BEFC030B v_interp_p1_f32 v0, v2, 0, 1, [m0] ; C8000402 v_interp_p2_f32 v0, [v0], v3, 0, 1, [m0] ; C8010403 v_interp_p1_f32 v1, v2, 1, 1, [m0] ; C8040502 v_interp_p2_f32 v1, [v1], v3, 1, 1, [m0] ; C8050503 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_b32 s0, s0, s19 ; 87001300 s_and_b64 exec, exec, s[6:7] ; 87FE067E image_sample v[5:8], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030500 v_interp_p1_f32 v0, v2, 0, 0, [m0] ; C8000002 v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; C8010003 v_interp_p1_f32 v1, v2, 1, 0, [m0] ; C8040102 v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; C8050103 v_interp_p1_f32 v4, v2, 2, 0, [m0] ; C8100202 v_interp_p2_f32 v4, [v4], v3, 2, 0, [m0] ; C8110203 v_interp_p1_f32 v2, v2, 3, 0, [m0] ; C8080302 v_interp_p2_f32 v2, [v2], v3, 3, 0, [m0] ; C8090303 s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mul_f32_e32 v3, v8, v2 ; 10060508 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mov_b32_e32 v2, v4 ; 7E040304 Shader epilog disassembly: v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0002 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Code Size: 132 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** Memory-mapped registers: GRBM_STATUS <- ME0PIPE0_CMDFIFO_AVAIL = 8 SRBM_RQ_PENDING = 1 ME0PIPE0_CF_RQ_PENDING = 0 ME0PIPE0_PF_RQ_PENDING = 0 GDS_DMA_RQ_PENDING = 0 DB_CLEAN = 1 CB_CLEAN = 1 TA_BUSY = 0 GDS_BUSY = 0 WD_BUSY_NO_DMA = 0 VGT_BUSY = 0 IA_BUSY_NO_DMA = 0 IA_BUSY = 0 SX_BUSY = 0 WD_BUSY = 0 SPI_BUSY = 0 BCI_BUSY = 0 SC_BUSY = 0 PA_BUSY = 0 DB_BUSY = 0 CP_COHERENCY_BUSY = 0 CP_BUSY = 0 CB_BUSY = 0 GUI_ACTIVE = 0 Last 60 lines of dmesg: