*************************************** * YoYo Games Linux Runner V1.3 * *************************************** CommandLine: -game game.unx ExeName= /home/murks/.local/share/Steam/steamapps/common/Crashlands/runner MemoryManager allocated: 1723148 INI DisplayName=Crashlands SavePrePend /home/murks/.config/Crashlands/ GAMEPAD: Initialising Ubuntu support Attempting to set gamepadcount to 4 GAMEPAD: Enumerating 3 GAMEPAD: device name Microsoft X-Box 360 pad GAMEPAD: buttons - 11, axes - 8 Steam being initialised with appId 391730 Steam_Init Error: Failed to load libsteam_api.so: libsteam_api.so: cannot open shared object file: No such file or directory Failed LoadSteamLib() call: Error loading libsteam_api.so SteamInit failed: Error loading libsteam_api.so Display Size(Pixels): 1920,1080 GameDisplayName=Crashlands Win #1 XF86VidModeExtension-Version 2.2 Got Doublebuffered Visual! glX-Version 1.4 Icon: w=120 h=120 sw=1920 wh=1080 WindowCentre: 448,156 Depth 24 Congrats, you have Direct Rendering! sync = 0 **** GLX Extensions *** GLX_ARB_create_context GLX_ARB_create_context_profile GLX_ARB_create_context_robustness GLX_ARB_fbconfig_float GLX_ARB_framebuffer_sRGB GLX_ARB_get_proc_address GLX_ARB_multisample GLX_EXT_import_context GLX_EXT_visual_info GLX_EXT_visual_rating GLX_EXT_fbconfig_packed_float GLX_EXT_framebuffer_sRGB GLX_EXT_create_context_es2_profile GLX_EXT_create_context_es_profile GLX_MESA_copy_sub_buffer GLX_MESA_multithread_makecurrent GLX_MESA_query_renderer GLX_MESA_swap_control GLX_OML_swap_method GLX_OML_sync_control GLX_SGI_make_current_read GLX_SGI_swap_control GLX_SGI_video_sync GLX_SGIS_multisample GLX_SGIX_fbconfig GLX_SGIX_pbuffer GLX_SGIX_visual_select_group GLX_EXT_texture_from_pixmap GLX_INTEL_swap_event GLX_EXT_buffer_age Checking for GLX_EXT_swap_control Checking for GLX_SGI_swap_control Vsync: GLX_SGI DOUBLE BUFFERED OpenGL: version string 3.0 Mesa 17.1.8 OpenGL: vendor string X.Org OpenGL GLSL: version string 1.30 Extensions: GL_ARB_multisample GL_EXT_abgr GL_EXT_bgra GL_EXT_blend_color GL_EXT_blend_minmax GL_EXT_blend_subtract GL_EXT_copy_texture GL_EXT_polygon_offset GL_EXT_subtexture GL_EXT_texture_object GL_EXT_vertex_array GL_EXT_compiled_vertex_array GL_EXT_texture GL_EXT_texture3D GL_IBM_rasterpos_clip GL_ARB_point_parameters GL_EXT_draw_range_elements GL_EXT_packed_pixels GL_EXT_point_parameters GL_EXT_rescale_normal GL_EXT_separate_specular_color GL_EXT_texture_edge_clamp GL_SGIS_generate_mipmap GL_SGIS_texture_border_clamp GL_SGIS_texture_edge_clamp GL_SGIS_texture_lod GL_ARB_framebuffer_sRGB GL_ARB_multitexture GL_EXT_framebuffer_sRGB GL_IBM_multimode_draw_arrays GL_IBM_texture_mirrored_repeat GL_ARB_texture_cube_map GL_ARB_texture_env_add GL_ARB_transpose_matrix GL_EXT_blend_func_separate GL_EXT_fog_coord GL_EXT_multi_draw_arrays GL_EXT_secondary_color GL_EXT_texture_env_add GL_EXT_texture_filter_anisotropic GL_EXT_texture_lod_bias GL_INGR_blend_func_separate GL_NV_blend_square GL_NV_light_max_exponent GL_NV_texgen_reflection GL_NV_texture_env_combine4 GL_S3_s3tc GL_SUN_multi_draw_arrays GL_ARB_texture_border_clamp GL_ARB_texture_compression GL_EXT_framebuffer_object GL_EXT_texture_compression_s3tc GL_EXT_texture_env_combine GL_EXT_texture_env_dot3 GL_MESA_window_pos GL_NV_packed_depth_stencil GL_NV_texture_rectangle GL_ARB_depth_texture GL_ARB_occlusion_query GL_ARB_shadow GL_ARB_texture_env_combine GL_ARB_texture_env_crossbar GL_ARB_texture_env_dot3 GL_ARB_texture_mirrored_repeat GL_ARB_window_pos GL_ATI_fragment_shader GL_EXT_stencil_two_side GL_EXT_texture_cube_map GL_NV_depth_clamp GL_NV_fog_distance GL_APPLE_packed_pixels GL_APPLE_vertex_array_object GL_ARB_draw_buffers GL_ARB_fragment_program GL_ARB_fragment_shader GL_ARB_shader_objects GL_ARB_vertex_program GL_ARB_vertex_shader GL_ATI_draw_buffers GL_ATI_texture_env_combine3 GL_ATI_texture_float GL_EXT_depth_bounds_test GL_EXT_shadow_funcs GL_EXT_stencil_wrap GL_MESA_pack_invert GL_NV_primitive_restart GL_ARB_depth_clamp GL_ARB_fragment_program_shadow GL_ARB_half_float_pixel GL_ARB_occlusion_query2 GL_ARB_point_sprite GL_ARB_shading_language_100 GL_ARB_sync GL_ARB_texture_non_power_of_two GL_ARB_vertex_buffer_object GL_ATI_blend_equation_separate GL_EXT_blend_equation_separate GL_OES_read_format GL_ARB_color_buffer_float GL_ARB_pixel_buffer_object GL_ARB_texture_compression_rgtc GL_ARB_texture_float GL_ARB_texture_rectangle GL_ATI_texture_compression_3dc GL_EXT_packed_float GL_EXT_pixel_buffer_object GL_EXT_texture_compression_dxt1 GL_EXT_texture_compression_rgtc GL_EXT_texture_mirror_clamp GL_EXT_texture_rectangle GL_EXT_texture_sRGB GL_EXT_texture_shared_exponent GL_ARB_framebuffer_object GL_EXT_framebuffer_blit GL_EXT_framebuffer_multisample GL_EXT_packed_depth_stencil GL_ARB_vertex_array_object GL_ATI_separate_stencil GL_ATI_texture_mirror_once GL_EXT_draw_buffers2 GL_EXT_draw_instanced GL_EXT_gpu_program_parameters GL_EXT_texture_array GL_EXT_texture_compression_latc GL_EXT_texture_integer GL_EXT_texture_sRGB_decode GL_EXT_timer_query GL_OES_EGL_image GL_AMD_performance_monitor GL_ARB_copy_buffer GL_ARB_depth_buffer_float GL_ARB_draw_instanced GL_ARB_half_float_vertex GL_ARB_instanced_arrays GL_ARB_map_buffer_range GL_ARB_texture_rg GL_ARB_texture_swizzle GL_ARB_vertex_array_bgra GL_EXT_texture_swizzle GL_EXT_vertex_array_bgra GL_NV_conditional_render GL_AMD_conservative_depth GL_AMD_draw_buffers_blend GL_AMD_seamless_cubemap_per_texture GL_AMD_shader_stencil_export GL_ARB_ES2_compatibility GL_ARB_blend_func_extended GL_ARB_debug_output GL_ARB_draw_buffers_blend GL_ARB_draw_elements_base_vertex GL_ARB_explicit_attrib_location GL_ARB_fragment_coord_conventions GL_ARB_provoking_vertex GL_ARB_sample_shading GL_ARB_sampler_objects GL_ARB_seamless_cube_map GL_ARB_shader_stencil_export GL_ARB_shader_texture_lod GL_ARB_texture_cube_map_array GL_ARB_texture_gather GL_ARB_texture_multisample GL_ARB_texture_query_lod GL_ARB_texture_rgb10_a2ui GL_ARB_uniform_buffer_object GL_ARB_vertex_type_2_10_10_10_rev GL_ATI_meminfo GL_EXT_provoking_vertex GL_EXT_texture_snorm GL_MESA_texture_signed_rgba GL_NV_texture_barrier GL_ARB_get_program_binary GL_ARB_robustness GL_ARB_separate_shader_objects GL_ARB_shader_bit_encoding GL_ARB_shader_precision GL_ARB_texture_compression_bptc GL_ARB_timer_query GL_ARB_transform_feedback2 GL_ARB_transform_feedback3 GL_NV_vdpau_interop GL_ANGLE_texture_compression_dxt3 GL_ANGLE_texture_compression_dxt5 GL_ARB_base_instance GL_ARB_compressed_texture_pixel_storage GL_ARB_conservative_depth GL_ARB_internalformat_query GL_ARB_map_buffer_alignment GL_ARB_shader_atomic_counters GL_ARB_shader_image_load_store GL_ARB_shading_language_420pack GL_ARB_shading_language_packing GL_ARB_texture_storage GL_ARB_transform_feedback_instanced GL_EXT_framebuffer_multisample_blit_scaled GL_EXT_transform_feedback GL_AMD_shader_trinary_minmax GL_ARB_ES3_compatibility GL_ARB_arrays_of_arrays GL_ARB_clear_buffer_object GL_ARB_compute_shader GL_ARB_copy_image GL_ARB_explicit_uniform_location GL_ARB_framebuffer_no_attachments GL_ARB_invalidate_subdata GL_ARB_program_interface_query GL_ARB_robust_buffer_access_behavior GL_ARB_shader_image_size GL_ARB_shader_storage_buffer_object GL_ARB_stencil_texturing GL_ARB_texture_query_levels GL_ARB_texture_storage_multisample GL_ARB_texture_view GL_ARB_vertex_attrib_binding GL_KHR_debug GL_KHR_robustness GL_AMD_pinned_memory GL_ARB_buffer_storage GL_ARB_clear_texture GL_ARB_compute_variable_group_size GL_ARB_internalformat_query2 GL_ARB_multi_bind GL_ARB_query_buffer_object GL_ARB_seamless_cubemap_per_texture GL_ARB_shader_draw_parameters GL_ARB_shader_group_vote GL_ARB_texture_mirror_clamp_to_edge GL_ARB_texture_stencil8 GL_ARB_vertex_type_10f_11f_11f_rev GL_EXT_shader_integer_mix GL_NVX_gpu_memory_info GL_ARB_clip_control GL_ARB_conditional_render_inverted GL_ARB_cull_distance GL_ARB_derivative_control GL_ARB_get_texture_sub_image GL_ARB_pipeline_statistics_query GL_ARB_shader_texture_image_samples GL_ARB_sparse_buffer GL_ARB_texture_barrier GL_EXT_polygon_offset_clamp GL_KHR_context_flush_control GL_KHR_robust_buffer_access_behavior GL_ARB_shader_atomic_counter_ops GL_ARB_shader_clock GL_MESA_shader_integer_functions Anisotropic filtering supported, max aniso 16 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END radeonsi: Compiling shader 1 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32, float, float, float, float) #0 { main_body: %27 = bitcast float %5 to i32 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33 } attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..3] DCL TEMP[0] 0: MUL TEMP[0], IN[0].xxxx, CONST[0] 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] 4: MOV OUT[1], IN[1] 5: END radeonsi: Compiling shader 2 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) #0 { main_body: %16 = bitcast [16 x <16 x i8>] addrspace(2)* %5 to <4 x i32> addrspace(2)* %17 = load <4 x i32>, <4 x i32> addrspace(2)* %16, align 16, !invariant.load !0 %18 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %17, i32 %14, i32 0, i1 false, i1 false) #2 %19 = extractelement <4 x float> %18, i32 0 %20 = extractelement <4 x float> %18, i32 1 %21 = extractelement <4 x float> %18, i32 2 %22 = extractelement <4 x float> %18, i32 3 %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %24 = bitcast <16 x i8> addrspace(2)* %23 to <4 x i32> addrspace(2)* %25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !invariant.load !0 %26 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %25, i32 %15, i32 0, i1 false, i1 false) #2 %27 = extractelement <4 x float> %26, i32 0 %28 = extractelement <4 x float> %26, i32 1 %29 = extractelement <4 x float> %26, i32 2 %30 = extractelement <4 x float> %26, i32 3 %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !invariant.load !0 %33 = call nsz float @llvm.SI.load.const(<16 x i8> %32, i32 0) %34 = fmul nsz float %19, %33 %35 = call nsz float @llvm.SI.load.const(<16 x i8> %32, i32 4) %36 = fmul nsz float %19, %35 %37 = call nsz float @llvm.SI.load.const(<16 x i8> %32, i32 8) %38 = fmul nsz float %19, %37 %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !invariant.load !0 %41 = call nsz float @llvm.SI.load.const(<16 x i8> %40, i32 12) %42 = fmul nsz float %19, %41 %43 = call nsz float @llvm.SI.load.const(<16 x i8> %40, i32 16) %44 = fmul nsz float %20, %43 %45 = fadd nsz float %44, %34 %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !invariant.load !0 %48 = call nsz float @llvm.SI.load.const(<16 x i8> %47, i32 20) %49 = fmul nsz float %20, %48 %50 = fadd nsz float %49, %36 %51 = call nsz float @llvm.SI.load.const(<16 x i8> %47, i32 24) %52 = fmul nsz float %20, %51 %53 = fadd nsz float %52, %38 %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !invariant.load !0 %56 = call nsz float @llvm.SI.load.const(<16 x i8> %55, i32 28) %57 = fmul nsz float %20, %56 %58 = fadd nsz float %57, %42 %59 = call nsz float @llvm.SI.load.const(<16 x i8> %55, i32 32) %60 = fmul nsz float %21, %59 %61 = fadd nsz float %60, %45 %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !invariant.load !0 %64 = call nsz float @llvm.SI.load.const(<16 x i8> %63, i32 36) %65 = fmul nsz float %21, %64 %66 = fadd nsz float %65, %50 %67 = call nsz float @llvm.SI.load.const(<16 x i8> %63, i32 40) %68 = fmul nsz float %21, %67 %69 = fadd nsz float %68, %53 %70 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %71 = load <16 x i8>, <16 x i8> addrspace(2)* %70, align 16, !invariant.load !0 %72 = call nsz float @llvm.SI.load.const(<16 x i8> %71, i32 44) %73 = fmul nsz float %21, %72 %74 = fadd nsz float %73, %58 %75 = call nsz float @llvm.SI.load.const(<16 x i8> %71, i32 48) %76 = fmul nsz float %22, %75 %77 = fadd nsz float %76, %61 %78 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %79 = load <16 x i8>, <16 x i8> addrspace(2)* %78, align 16, !invariant.load !0 %80 = call nsz float @llvm.SI.load.const(<16 x i8> %79, i32 52) %81 = fmul nsz float %22, %80 %82 = fadd nsz float %81, %66 %83 = call nsz float @llvm.SI.load.const(<16 x i8> %79, i32 56) %84 = fmul nsz float %22, %83 %85 = fadd nsz float %84, %69 %86 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %87 = load <16 x i8>, <16 x i8> addrspace(2)* %86, align 16, !invariant.load !0 %88 = call nsz float @llvm.SI.load.const(<16 x i8> %87, i32 60) %89 = fmul nsz float %22, %88 %90 = fadd nsz float %89, %74 %91 = and i32 %9, 1 %92 = icmp eq i32 %91, 0 br i1 %92, label %endif-block, label %if-true-block if-true-block: ; preds = %main_body %93 = call nsz float @llvm.AMDGPU.clamp.(float %27, float 0.000000e+00, float 1.000000e+00) %94 = call nsz float @llvm.AMDGPU.clamp.(float %28, float 0.000000e+00, float 1.000000e+00) %95 = call nsz float @llvm.AMDGPU.clamp.(float %29, float 0.000000e+00, float 1.000000e+00) %96 = call nsz float @llvm.AMDGPU.clamp.(float %30, float 0.000000e+00, float 1.000000e+00) br label %endif-block endif-block: ; preds = %main_body, %if-true-block %.03 = phi float [ %96, %if-true-block ], [ %30, %main_body ] %.02 = phi float [ %95, %if-true-block ], [ %29, %main_body ] %.01 = phi float [ %94, %if-true-block ], [ %28, %main_body ] %.0 = phi float [ %93, %if-true-block ], [ %27, %main_body ] %97 = bitcast i32 %12 to float %98 = insertvalue <{ float, float, float }> undef, float %97, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %.0, float %.01, float %.02, float %.03) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %77, float %82, float %85, float %90) ret <{ float, float, float }> %98 } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.clamp.(float, float, float) #2 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #3 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } attributes #3 = { nounwind } !0 = !{} FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END radeonsi: Compiling shader 3 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %6) #1 %24 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %6) #1 %25 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %6) #1 %26 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %6) #1 %27 = bitcast float %5 to i32 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33 } ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END radeonsi: Compiling shader 4 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) #0 { main_body: %16 = bitcast [16 x <16 x i8>] addrspace(2)* %5 to <4 x i32> addrspace(2)* %17 = load <4 x i32>, <4 x i32> addrspace(2)* %16, align 16, !invariant.load !0 %18 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %17, i32 %14, i32 0, i1 false, i1 false) #3 %19 = extractelement <4 x float> %18, i32 0 %20 = extractelement <4 x float> %18, i32 1 %21 = extractelement <4 x float> %18, i32 2 %22 = extractelement <4 x float> %18, i32 3 %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %24 = bitcast <16 x i8> addrspace(2)* %23 to <4 x i32> addrspace(2)* %25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !invariant.load !0 %26 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %25, i32 %15, i32 0, i1 false, i1 false) #3 %27 = extractelement <4 x float> %26, i32 0 %28 = extractelement <4 x float> %26, i32 1 %29 = extractelement <4 x float> %26, i32 2 %30 = extractelement <4 x float> %26, i32 3 %31 = bitcast i32 %12 to float %32 = insertvalue <{ float, float, float }> undef, float %31, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %27, float %28, float %29, float %30) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %19, float %20, float %21, float %22) ret <{ float, float, float }> %32 } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #2 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind } attributes #3 = { nounwind readnone } !0 = !{} radeonsi: Compiling shader 5 Vertex Shader Prolog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> @vs_prolog(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> undef, i32 %0, 0 %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %20, i32 %1, 1 %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %21, i32 %2, 2 %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %22, i32 %3, 3 %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %23, i32 %4, 4 %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %24, i32 %5, 5 %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %25, i32 %6, 6 %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %26, i32 %7, 7 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %27, i32 %8, 8 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %28, i32 %9, 9 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %29, i32 %10, 10 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %30, i32 %11, 11 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %31, i32 %12, 12 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %32, i32 %13, 13 %34 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %33, i32 %14, 14 %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %34, i32 %15, 15 %36 = bitcast i32 %16 to float %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %35, float %36, 16 %38 = bitcast i32 %17 to float %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %37, float %38, 17 %40 = bitcast i32 %18 to float %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %39, float %40, 18 %42 = bitcast i32 %19 to float %43 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %41, float %42, 19 %44 = add i32 %16, %12 %45 = bitcast i32 %44 to float %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %43, float %45, 20 %47 = add i32 %16, %12 %48 = bitcast i32 %47 to float %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %46, float %48, 21 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %49 } attributes #0 = { "no-signed-zeros-fp-math"="true" } radeonsi: Compiling shader 6 Vertex Shader Epilog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs void @vs_epilog() #0 { main_body: ret void } attributes #0 = { "no-signed-zeros-fp-math"="true" } SHADER KEY part.vs.prolog.instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} part.vs.epilog.export_prim_id = 0 as_es = 0 as_ls = 0 mono.vs.fix_fetch = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} opt.hw_vs.kill_outputs = 0x0 opt.hw_vs.kill_outputs2 = 0x0 opt.hw_vs.clip_disable = 0 Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s12, v0 ; 3208000C v_mov_b32_e32 v5, v4 ; 7E0A0304 Shader main disassembly: s_load_dwordx4 s[4:7], s[10:11], 0x10 ; C00A0105 00000010 s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C00A0005 00000000 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[10:13], v5, s[4:7], 0 idxen ; E00C2000 80010A05 s_nop 0 ; BF800000 buffer_load_format_xyzw v[6:9], v4, s[0:3], 0 idxen ; E00C2000 80000604 s_waitcnt vmcnt(1) ; BF8C0F71 exp param0 v10, v11, v12, v13 ; C400020F 0D0C0B0A s_waitcnt vmcnt(0) ; BF8C0F70 exp pos0 v6, v7, v8, v9 done ; C40008CF 09080706 s_waitcnt expcnt(0) ; BF8C0F0F Shader epilog disassembly: s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** radeonsi: Compiling shader 7 Fragment Shader Epilog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps void @ps_epilog(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 { main_body: %20 = call i32 @llvm.SI.packf16(float %6, float %7) %21 = bitcast i32 %20 to float %22 = call i32 @llvm.SI.packf16(float %8, float %9) %23 = bitcast i32 %22 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %21, float %23, float undef, float undef) ret void } ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #2 attributes #0 = { "InitialPSInputAddr"="16777215" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind } SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x4 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b32 m0, s11 ; BEFC000B v_interp_mov_f32 v0, p0, attr0.x ; D4020002 v_interp_mov_f32 v1, p0, attr0.y ; D4060102 v_interp_mov_f32 v2, p0, attr0.z ; D40A0202 v_interp_mov_f32 v3, p0, attr0.w ; D40E0302 Shader epilog disassembly: v_cvt_pkrtz_f16_f32_e64 v0, v0, v1 ; D2960000 00020300 v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; D2960001 00020702 exp mrt0 v0, v1, v0, v0 done compr vm ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0020 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 48 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** Texture #1 16,16 Texture #2 16,16 Texture #1 16,16 Texture #2 16,16 finished(2)!! VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1].x, PSIZE DCL OUT[2], GENERIC[0] DCL OUT[3].xy, GENERIC[1] DCL CONST[0..19] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[16], IN[0].xxxx 1: MAD TEMP[0], CONST[17], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[18], IN[0].zzzz, TEMP[0] 3: ADD OUT[0], TEMP[0], CONST[19] 4: MOV OUT[1].x, IMM[0].xxxx 5: MOV OUT[2], IN[2] 6: MOV OUT[3].xy, IN[1].xyxx 7: END radeonsi: Compiling shader 8 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) #0 { main_body: %17 = bitcast [16 x <16 x i8>] addrspace(2)* %5 to <4 x i32> addrspace(2)* %18 = load <4 x i32>, <4 x i32> addrspace(2)* %17, align 16, !invariant.load !0 %19 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %18, i32 %14, i32 0, i1 false, i1 false) #2 %20 = extractelement <4 x float> %19, i32 0 %21 = extractelement <4 x float> %19, i32 1 %22 = extractelement <4 x float> %19, i32 2 %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %24 = bitcast <16 x i8> addrspace(2)* %23 to <4 x i32> addrspace(2)* %25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !invariant.load !0 %26 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %25, i32 %15, i32 0, i1 false, i1 false) #2 %27 = extractelement <4 x float> %26, i32 0 %28 = extractelement <4 x float> %26, i32 1 %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2, !amdgpu.uniform !0 %30 = bitcast <16 x i8> addrspace(2)* %29 to <4 x i32> addrspace(2)* %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !invariant.load !0 %32 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %31, i32 %16, i32 0, i1 false, i1 false) #2 %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !invariant.load !0 %39 = call nsz float @llvm.SI.load.const(<16 x i8> %38, i32 256) %40 = fmul nsz float %39, %20 %41 = call nsz float @llvm.SI.load.const(<16 x i8> %38, i32 260) %42 = fmul nsz float %41, %20 %43 = call nsz float @llvm.SI.load.const(<16 x i8> %38, i32 264) %44 = fmul nsz float %43, %20 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !invariant.load !0 %47 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 268) %48 = fmul nsz float %47, %20 %49 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 272) %50 = fmul nsz float %49, %21 %51 = fadd nsz float %50, %40 %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !invariant.load !0 %54 = call nsz float @llvm.SI.load.const(<16 x i8> %53, i32 276) %55 = fmul nsz float %54, %21 %56 = fadd nsz float %55, %42 %57 = call nsz float @llvm.SI.load.const(<16 x i8> %53, i32 280) %58 = fmul nsz float %57, %21 %59 = fadd nsz float %58, %44 %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !invariant.load !0 %62 = call nsz float @llvm.SI.load.const(<16 x i8> %61, i32 284) %63 = fmul nsz float %62, %21 %64 = fadd nsz float %63, %48 %65 = call nsz float @llvm.SI.load.const(<16 x i8> %61, i32 288) %66 = fmul nsz float %65, %22 %67 = fadd nsz float %66, %51 %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !invariant.load !0 %70 = call nsz float @llvm.SI.load.const(<16 x i8> %69, i32 292) %71 = fmul nsz float %70, %22 %72 = fadd nsz float %71, %56 %73 = call nsz float @llvm.SI.load.const(<16 x i8> %69, i32 296) %74 = fmul nsz float %73, %22 %75 = fadd nsz float %74, %59 %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !invariant.load !0 %78 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 300) %79 = fmul nsz float %78, %22 %80 = fadd nsz float %79, %64 %81 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 304) %82 = fadd nsz float %67, %81 %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !invariant.load !0 %85 = call nsz float @llvm.SI.load.const(<16 x i8> %84, i32 308) %86 = fadd nsz float %72, %85 %87 = call nsz float @llvm.SI.load.const(<16 x i8> %84, i32 312) %88 = fadd nsz float %75, %87 %89 = call nsz float @llvm.SI.load.const(<16 x i8> %84, i32 316) %90 = fadd nsz float %80, %89 %91 = bitcast i32 %12 to float %92 = insertvalue <{ float, float, float }> undef, float %91, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %33, float %34, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %27, float %28, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %82, float %86, float %88, float %90) call void @llvm.SI.export(i32 1, i32 0, i32 1, i32 13, i32 0, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret <{ float, float, float }> %92 } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #2 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #3 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } attributes #3 = { nounwind } !0 = !{} FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1].xy, GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL OUT[0], IN[0], TEMP[0] 3: END radeonsi: Compiling shader 9 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = bitcast <2 x i32> %8 to <2 x float> %24 = extractelement <2 x float> %23, i32 0 %25 = extractelement <2 x float> %23, i32 1 %26 = call nsz float @llvm.amdgcn.interp.p1(float %24, i32 0, i32 1, i32 %6) #1 %27 = call nsz float @llvm.amdgcn.interp.p2(float %26, float %25, i32 0, i32 1, i32 %6) #1 %28 = bitcast <2 x i32> %8 to <2 x float> %29 = extractelement <2 x float> %28, i32 0 %30 = extractelement <2 x float> %28, i32 1 %31 = call nsz float @llvm.amdgcn.interp.p1(float %29, i32 1, i32 1, i32 %6) #1 %32 = call nsz float @llvm.amdgcn.interp.p2(float %31, float %30, i32 1, i32 1, i32 %6) #1 %33 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %34 = load <8 x i32>, <8 x i32> addrspace(2)* %33, align 32, !invariant.load !0 %35 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %36 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %35, i64 0, i64 3, !amdgpu.uniform !0 %37 = load <4 x i32>, <4 x i32> addrspace(2)* %36, align 16, !invariant.load !0 %38 = bitcast float %27 to i32 %39 = bitcast float %32 to i32 %40 = insertelement <2 x i32> undef, i32 %38, i32 0 %41 = insertelement <2 x i32> %40, i32 %39, i32 1 %42 = bitcast <2 x i32> %41 to <2 x float> %43 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %42, <8 x i32> %34, <4 x i32> %37, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = extractelement <4 x float> %43, i32 2 %47 = extractelement <4 x float> %43, i32 3 %48 = bitcast <2 x i32> %8 to <2 x float> %49 = extractelement <2 x float> %48, i32 0 %50 = extractelement <2 x float> %48, i32 1 %51 = call nsz float @llvm.amdgcn.interp.p1(float %49, i32 0, i32 0, i32 %6) #1 %52 = call nsz float @llvm.amdgcn.interp.p2(float %51, float %50, i32 0, i32 0, i32 %6) #1 %53 = fmul nsz float %52, %44 %54 = bitcast <2 x i32> %8 to <2 x float> %55 = extractelement <2 x float> %54, i32 0 %56 = extractelement <2 x float> %54, i32 1 %57 = call nsz float @llvm.amdgcn.interp.p1(float %55, i32 1, i32 0, i32 %6) #1 %58 = call nsz float @llvm.amdgcn.interp.p2(float %57, float %56, i32 1, i32 0, i32 %6) #1 %59 = fmul nsz float %58, %45 %60 = bitcast <2 x i32> %8 to <2 x float> %61 = extractelement <2 x float> %60, i32 0 %62 = extractelement <2 x float> %60, i32 1 %63 = call nsz float @llvm.amdgcn.interp.p1(float %61, i32 2, i32 0, i32 %6) #1 %64 = call nsz float @llvm.amdgcn.interp.p2(float %63, float %62, i32 2, i32 0, i32 %6) #1 %65 = fmul nsz float %64, %46 %66 = bitcast <2 x i32> %8 to <2 x float> %67 = extractelement <2 x float> %66, i32 0 %68 = extractelement <2 x float> %66, i32 1 %69 = call nsz float @llvm.amdgcn.interp.p1(float %67, i32 3, i32 0, i32 %6) #1 %70 = call nsz float @llvm.amdgcn.interp.p2(float %69, float %68, i32 3, i32 0, i32 %6) #1 %71 = fmul nsz float %70, %47 %72 = bitcast float %5 to i32 %73 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %72, 10 %74 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %73, float %53, 11 %75 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %74, float %59, 12 %76 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %75, float %65, 13 %77 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %76, float %71, 14 %78 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %77, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %78 } ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readonly } !0 = !{} VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1].x, PSIZE DCL OUT[2], GENERIC[0] DCL OUT[3].xyz, GENERIC[1] DCL CONST[0..21] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[16], IN[0].xxxx 1: MAD TEMP[0], CONST[17], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[18], IN[0].zzzz, TEMP[0] 3: ADD OUT[0], TEMP[0], CONST[19] 4: MOV OUT[1].x, IMM[0].xxxx 5: MOV OUT[2], IN[2] 6: MUL TEMP[0], CONST[12], IN[0].xxxx 7: MAD TEMP[0], CONST[13], IN[0].yyyy, TEMP[0] 8: MAD TEMP[0], CONST[14], IN[0].zzzz, TEMP[0] 9: ADD TEMP[0].z, TEMP[0], CONST[15] 10: ADD TEMP[0].x, TEMP[0].zzzz, -CONST[20].xxxx 11: MUL TEMP[0].x, TEMP[0].xxxx, CONST[21].xxxx 12: MOV OUT[3].z, TEMP[0].xxxx 13: MOV OUT[3].xy, IN[1].xyxx 14: END radeonsi: Compiling shader 10 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) #0 { main_body: %17 = bitcast [16 x <16 x i8>] addrspace(2)* %5 to <4 x i32> addrspace(2)* %18 = load <4 x i32>, <4 x i32> addrspace(2)* %17, align 16, !invariant.load !0 %19 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %18, i32 %14, i32 0, i1 false, i1 false) #2 %20 = extractelement <4 x float> %19, i32 0 %21 = extractelement <4 x float> %19, i32 1 %22 = extractelement <4 x float> %19, i32 2 %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %24 = bitcast <16 x i8> addrspace(2)* %23 to <4 x i32> addrspace(2)* %25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !invariant.load !0 %26 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %25, i32 %15, i32 0, i1 false, i1 false) #2 %27 = extractelement <4 x float> %26, i32 0 %28 = extractelement <4 x float> %26, i32 1 %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2, !amdgpu.uniform !0 %30 = bitcast <16 x i8> addrspace(2)* %29 to <4 x i32> addrspace(2)* %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !invariant.load !0 %32 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %31, i32 %16, i32 0, i1 false, i1 false) #2 %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !invariant.load !0 %39 = call nsz float @llvm.SI.load.const(<16 x i8> %38, i32 256) %40 = fmul nsz float %39, %20 %41 = call nsz float @llvm.SI.load.const(<16 x i8> %38, i32 260) %42 = fmul nsz float %41, %20 %43 = call nsz float @llvm.SI.load.const(<16 x i8> %38, i32 264) %44 = fmul nsz float %43, %20 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !invariant.load !0 %47 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 268) %48 = fmul nsz float %47, %20 %49 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 272) %50 = fmul nsz float %49, %21 %51 = fadd nsz float %50, %40 %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !invariant.load !0 %54 = call nsz float @llvm.SI.load.const(<16 x i8> %53, i32 276) %55 = fmul nsz float %54, %21 %56 = fadd nsz float %55, %42 %57 = call nsz float @llvm.SI.load.const(<16 x i8> %53, i32 280) %58 = fmul nsz float %57, %21 %59 = fadd nsz float %58, %44 %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !invariant.load !0 %62 = call nsz float @llvm.SI.load.const(<16 x i8> %61, i32 284) %63 = fmul nsz float %62, %21 %64 = fadd nsz float %63, %48 %65 = call nsz float @llvm.SI.load.const(<16 x i8> %61, i32 288) %66 = fmul nsz float %65, %22 %67 = fadd nsz float %66, %51 %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !invariant.load !0 %70 = call nsz float @llvm.SI.load.const(<16 x i8> %69, i32 292) %71 = fmul nsz float %70, %22 %72 = fadd nsz float %71, %56 %73 = call nsz float @llvm.SI.load.const(<16 x i8> %69, i32 296) %74 = fmul nsz float %73, %22 %75 = fadd nsz float %74, %59 %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !invariant.load !0 %78 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 300) %79 = fmul nsz float %78, %22 %80 = fadd nsz float %79, %64 %81 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 304) %82 = fadd nsz float %67, %81 %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !invariant.load !0 %85 = call nsz float @llvm.SI.load.const(<16 x i8> %84, i32 308) %86 = fadd nsz float %72, %85 %87 = call nsz float @llvm.SI.load.const(<16 x i8> %84, i32 312) %88 = fadd nsz float %75, %87 %89 = call nsz float @llvm.SI.load.const(<16 x i8> %84, i32 316) %90 = fadd nsz float %80, %89 %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !invariant.load !0 %93 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 200) %94 = fmul nsz float %93, %20 %95 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 216) %96 = fmul nsz float %95, %21 %97 = fadd nsz float %96, %94 %98 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %99 = load <16 x i8>, <16 x i8> addrspace(2)* %98, align 16, !invariant.load !0 %100 = call nsz float @llvm.SI.load.const(<16 x i8> %99, i32 232) %101 = fmul nsz float %100, %22 %102 = fadd nsz float %101, %97 %103 = call nsz float @llvm.SI.load.const(<16 x i8> %99, i32 248) %104 = fadd nsz float %102, %103 %105 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %106 = load <16 x i8>, <16 x i8> addrspace(2)* %105, align 16, !invariant.load !0 %107 = call nsz float @llvm.SI.load.const(<16 x i8> %106, i32 320) %108 = fsub nsz float %104, %107 %109 = call nsz float @llvm.SI.load.const(<16 x i8> %106, i32 336) %110 = fmul nsz float %108, %109 %111 = bitcast i32 %12 to float %112 = insertvalue <{ float, float, float }> undef, float %111, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %33, float %34, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %27, float %28, float %110, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %82, float %86, float %88, float %90) call void @llvm.SI.export(i32 1, i32 0, i32 1, i32 13, i32 0, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret <{ float, float, float }> %112 } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #2 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #3 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } attributes #3 = { nounwind } !0 = !{} FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1].xyz, GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1] DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[0], IN[0], TEMP[0] 3: MOV_SAT TEMP[1].x, IN[1].zzzz 4: LRP TEMP[1].xyz, TEMP[1].xxxx, CONST[1].xyzz, TEMP[0].xyzz 5: MOV TEMP[1].w, TEMP[0].wwww 6: MOV OUT[0], TEMP[1] 7: END radeonsi: Compiling shader 11 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = bitcast <2 x i32> %8 to <2 x float> %24 = extractelement <2 x float> %23, i32 0 %25 = extractelement <2 x float> %23, i32 1 %26 = call nsz float @llvm.amdgcn.interp.p1(float %24, i32 0, i32 1, i32 %6) #1 %27 = call nsz float @llvm.amdgcn.interp.p2(float %26, float %25, i32 0, i32 1, i32 %6) #1 %28 = bitcast <2 x i32> %8 to <2 x float> %29 = extractelement <2 x float> %28, i32 0 %30 = extractelement <2 x float> %28, i32 1 %31 = call nsz float @llvm.amdgcn.interp.p1(float %29, i32 1, i32 1, i32 %6) #1 %32 = call nsz float @llvm.amdgcn.interp.p2(float %31, float %30, i32 1, i32 1, i32 %6) #1 %33 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %34 = load <8 x i32>, <8 x i32> addrspace(2)* %33, align 32, !invariant.load !0 %35 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %36 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %35, i64 0, i64 3, !amdgpu.uniform !0 %37 = load <4 x i32>, <4 x i32> addrspace(2)* %36, align 16, !invariant.load !0 %38 = bitcast float %27 to i32 %39 = bitcast float %32 to i32 %40 = insertelement <2 x i32> undef, i32 %38, i32 0 %41 = insertelement <2 x i32> %40, i32 %39, i32 1 %42 = bitcast <2 x i32> %41 to <2 x float> %43 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %42, <8 x i32> %34, <4 x i32> %37, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = extractelement <4 x float> %43, i32 2 %47 = extractelement <4 x float> %43, i32 3 %48 = bitcast <2 x i32> %8 to <2 x float> %49 = extractelement <2 x float> %48, i32 0 %50 = extractelement <2 x float> %48, i32 1 %51 = call nsz float @llvm.amdgcn.interp.p1(float %49, i32 0, i32 0, i32 %6) #1 %52 = call nsz float @llvm.amdgcn.interp.p2(float %51, float %50, i32 0, i32 0, i32 %6) #1 %53 = fmul nsz float %52, %44 %54 = bitcast <2 x i32> %8 to <2 x float> %55 = extractelement <2 x float> %54, i32 0 %56 = extractelement <2 x float> %54, i32 1 %57 = call nsz float @llvm.amdgcn.interp.p1(float %55, i32 1, i32 0, i32 %6) #1 %58 = call nsz float @llvm.amdgcn.interp.p2(float %57, float %56, i32 1, i32 0, i32 %6) #1 %59 = fmul nsz float %58, %45 %60 = bitcast <2 x i32> %8 to <2 x float> %61 = extractelement <2 x float> %60, i32 0 %62 = extractelement <2 x float> %60, i32 1 %63 = call nsz float @llvm.amdgcn.interp.p1(float %61, i32 2, i32 0, i32 %6) #1 %64 = call nsz float @llvm.amdgcn.interp.p2(float %63, float %62, i32 2, i32 0, i32 %6) #1 %65 = fmul nsz float %64, %46 %66 = bitcast <2 x i32> %8 to <2 x float> %67 = extractelement <2 x float> %66, i32 0 %68 = extractelement <2 x float> %66, i32 1 %69 = call nsz float @llvm.amdgcn.interp.p1(float %67, i32 3, i32 0, i32 %6) #1 %70 = call nsz float @llvm.amdgcn.interp.p2(float %69, float %68, i32 3, i32 0, i32 %6) #1 %71 = fmul nsz float %70, %47 %72 = bitcast <2 x i32> %8 to <2 x float> %73 = extractelement <2 x float> %72, i32 0 %74 = extractelement <2 x float> %72, i32 1 %75 = call nsz float @llvm.amdgcn.interp.p1(float %73, i32 2, i32 1, i32 %6) #1 %76 = call nsz float @llvm.amdgcn.interp.p2(float %75, float %74, i32 2, i32 1, i32 %6) #1 %77 = call nsz float @llvm.AMDGPU.clamp.(float %76, float 0.000000e+00, float 1.000000e+00) %78 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %79 = load <16 x i8>, <16 x i8> addrspace(2)* %78, align 16, !invariant.load !0 %80 = call nsz float @llvm.SI.load.const(<16 x i8> %79, i32 16) %81 = fsub nsz float 1.000000e+00, %77 %82 = fmul nsz float %80, %77 %83 = fmul nsz float %53, %81 %84 = fadd nsz float %82, %83 %85 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !invariant.load !0 %87 = call nsz float @llvm.SI.load.const(<16 x i8> %86, i32 20) %88 = fsub nsz float 1.000000e+00, %77 %89 = fmul nsz float %87, %77 %90 = fmul nsz float %59, %88 %91 = fadd nsz float %89, %90 %92 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !invariant.load !0 %94 = call nsz float @llvm.SI.load.const(<16 x i8> %93, i32 24) %95 = fsub nsz float 1.000000e+00, %77 %96 = fmul nsz float %94, %77 %97 = fmul nsz float %65, %95 %98 = fadd nsz float %96, %97 %99 = bitcast float %5 to i32 %100 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %99, 10 %101 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %100, float %84, 11 %102 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %101, float %91, 12 %103 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %102, float %98, 13 %104 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %103, float %71, 14 %105 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %104, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %105 } ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.clamp.(float, float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readonly } !0 = !{} FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1].xy, GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[0], IN[0], TEMP[0] 3: FSGE TEMP[1].x, CONST[1].xxxx, TEMP[0].wwww 4: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 5: KILL_IF -TEMP[1].xxxx 6: MOV OUT[0], TEMP[0] 7: END radeonsi: Compiling shader 12 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = bitcast <2 x i32> %8 to <2 x float> %24 = extractelement <2 x float> %23, i32 0 %25 = extractelement <2 x float> %23, i32 1 %26 = call nsz float @llvm.amdgcn.interp.p1(float %24, i32 0, i32 0, i32 %6) #1 %27 = call nsz float @llvm.amdgcn.interp.p2(float %26, float %25, i32 0, i32 0, i32 %6) #1 %28 = call nsz float @llvm.amdgcn.interp.p1(float %24, i32 1, i32 0, i32 %6) #1 %29 = call nsz float @llvm.amdgcn.interp.p2(float %28, float %25, i32 1, i32 0, i32 %6) #1 %30 = call nsz float @llvm.amdgcn.interp.p1(float %24, i32 2, i32 0, i32 %6) #1 %31 = call nsz float @llvm.amdgcn.interp.p2(float %30, float %25, i32 2, i32 0, i32 %6) #1 %32 = call nsz float @llvm.amdgcn.interp.p1(float %24, i32 3, i32 0, i32 %6) #1 %33 = call nsz float @llvm.amdgcn.interp.p2(float %32, float %25, i32 3, i32 0, i32 %6) #1 %34 = bitcast <2 x i32> %8 to <2 x float> %35 = extractelement <2 x float> %34, i32 0 %36 = extractelement <2 x float> %34, i32 1 %37 = call nsz float @llvm.amdgcn.interp.p1(float %35, i32 0, i32 1, i32 %6) #1 %38 = call nsz float @llvm.amdgcn.interp.p2(float %37, float %36, i32 0, i32 1, i32 %6) #1 %39 = call nsz float @llvm.amdgcn.interp.p1(float %35, i32 1, i32 1, i32 %6) #1 %40 = call nsz float @llvm.amdgcn.interp.p2(float %39, float %36, i32 1, i32 1, i32 %6) #1 %41 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %42 = load <8 x i32>, <8 x i32> addrspace(2)* %41, align 32, !invariant.load !0 %43 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %44 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %43, i64 0, i64 3, !amdgpu.uniform !0 %45 = load <4 x i32>, <4 x i32> addrspace(2)* %44, align 16, !invariant.load !0 %46 = bitcast float %38 to i32 %47 = bitcast float %40 to i32 %48 = insertelement <2 x i32> undef, i32 %46, i32 0 %49 = insertelement <2 x i32> %48, i32 %47, i32 1 %50 = bitcast <2 x i32> %49 to <2 x float> %51 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %50, <8 x i32> %42, <4 x i32> %45, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = extractelement <4 x float> %51, i32 2 %55 = extractelement <4 x float> %51, i32 3 %56 = fmul nsz float %27, %52 %57 = fmul nsz float %29, %53 %58 = fmul nsz float %31, %54 %59 = fmul nsz float %33, %55 %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !invariant.load !0 %62 = call nsz float @llvm.SI.load.const(<16 x i8> %61, i32 16) %63 = fcmp nsz oge float %62, %59 %64 = select i1 %63, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %64) %65 = bitcast float %5 to i32 %66 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %65, 10 %67 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %66, float %56, 11 %68 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %67, float %57, 12 %69 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %68, float %58, 13 %70 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %69, float %59, 14 %71 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %70, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %71 } ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind declare void @llvm.AMDGPU.kill(float) #3 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readonly } attributes #3 = { nounwind } !0 = !{} FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1].xyz, GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1..2] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[0], IN[0], TEMP[0] 3: FSGE TEMP[1].x, CONST[2].xxxx, TEMP[0].wwww 4: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 5: KILL_IF -TEMP[1].xxxx 6: MOV_SAT TEMP[1].x, IN[1].zzzz 7: LRP TEMP[1].xyz, TEMP[1].xxxx, CONST[1].xyzz, TEMP[0].xyzz 8: MOV TEMP[1].w, TEMP[0].wwww 9: MOV OUT[0], TEMP[1] 10: END radeonsi: Compiling shader 13 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = bitcast <2 x i32> %8 to <2 x float> %24 = extractelement <2 x float> %23, i32 0 %25 = extractelement <2 x float> %23, i32 1 %26 = call nsz float @llvm.amdgcn.interp.p1(float %24, i32 0, i32 0, i32 %6) #1 %27 = call nsz float @llvm.amdgcn.interp.p2(float %26, float %25, i32 0, i32 0, i32 %6) #1 %28 = call nsz float @llvm.amdgcn.interp.p1(float %24, i32 1, i32 0, i32 %6) #1 %29 = call nsz float @llvm.amdgcn.interp.p2(float %28, float %25, i32 1, i32 0, i32 %6) #1 %30 = call nsz float @llvm.amdgcn.interp.p1(float %24, i32 2, i32 0, i32 %6) #1 %31 = call nsz float @llvm.amdgcn.interp.p2(float %30, float %25, i32 2, i32 0, i32 %6) #1 %32 = call nsz float @llvm.amdgcn.interp.p1(float %24, i32 3, i32 0, i32 %6) #1 %33 = call nsz float @llvm.amdgcn.interp.p2(float %32, float %25, i32 3, i32 0, i32 %6) #1 %34 = bitcast <2 x i32> %8 to <2 x float> %35 = extractelement <2 x float> %34, i32 0 %36 = extractelement <2 x float> %34, i32 1 %37 = call nsz float @llvm.amdgcn.interp.p1(float %35, i32 0, i32 1, i32 %6) #1 %38 = call nsz float @llvm.amdgcn.interp.p2(float %37, float %36, i32 0, i32 1, i32 %6) #1 %39 = call nsz float @llvm.amdgcn.interp.p1(float %35, i32 1, i32 1, i32 %6) #1 %40 = call nsz float @llvm.amdgcn.interp.p2(float %39, float %36, i32 1, i32 1, i32 %6) #1 %41 = call nsz float @llvm.amdgcn.interp.p1(float %35, i32 2, i32 1, i32 %6) #1 %42 = call nsz float @llvm.amdgcn.interp.p2(float %41, float %36, i32 2, i32 1, i32 %6) #1 %43 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !invariant.load !0 %45 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %46 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %45, i64 0, i64 3, !amdgpu.uniform !0 %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !invariant.load !0 %48 = bitcast float %38 to i32 %49 = bitcast float %40 to i32 %50 = insertelement <2 x i32> undef, i32 %48, i32 0 %51 = insertelement <2 x i32> %50, i32 %49, i32 1 %52 = bitcast <2 x i32> %51 to <2 x float> %53 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %52, <8 x i32> %44, <4 x i32> %47, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = extractelement <4 x float> %53, i32 3 %58 = fmul nsz float %27, %54 %59 = fmul nsz float %29, %55 %60 = fmul nsz float %31, %56 %61 = fmul nsz float %33, %57 %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !invariant.load !0 %64 = call nsz float @llvm.SI.load.const(<16 x i8> %63, i32 32) %65 = fcmp nsz oge float %64, %61 %66 = select i1 %65, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %66) %67 = call nsz float @llvm.AMDGPU.clamp.(float %42, float 0.000000e+00, float 1.000000e+00) %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !invariant.load !0 %70 = call nsz float @llvm.SI.load.const(<16 x i8> %69, i32 16) %71 = fsub nsz float 1.000000e+00, %67 %72 = fmul nsz float %70, %67 %73 = fmul nsz float %58, %71 %74 = fadd nsz float %72, %73 %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !invariant.load !0 %77 = call nsz float @llvm.SI.load.const(<16 x i8> %76, i32 20) %78 = fsub nsz float 1.000000e+00, %67 %79 = fmul nsz float %77, %67 %80 = fmul nsz float %59, %78 %81 = fadd nsz float %79, %80 %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !invariant.load !0 %84 = call nsz float @llvm.SI.load.const(<16 x i8> %83, i32 24) %85 = fsub nsz float 1.000000e+00, %67 %86 = fmul nsz float %84, %67 %87 = fmul nsz float %60, %85 %88 = fadd nsz float %86, %87 %89 = bitcast float %5 to i32 %90 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %89, 10 %91 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %90, float %74, 11 %92 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %91, float %81, 12 %93 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %92, float %88, 13 %94 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %93, float %61, 14 %95 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %94, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %95 } ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind declare void @llvm.AMDGPU.kill(float) #3 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.clamp.(float, float, float) #1 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readonly } attributes #3 = { nounwind } !0 = !{} VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1].x, PSIZE DCL OUT[2], GENERIC[0] DCL OUT[3].xy, GENERIC[1] DCL CONST[0..24] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {0, 1, 0, 0} 0: MUL TEMP[0], CONST[16], IN[0].xxxx 1: MAD TEMP[0], CONST[17], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[18], IN[0].zzzz, TEMP[0] 3: ADD OUT[0], TEMP[0], CONST[19] 4: MOV OUT[1].x, IMM[0].xxxx 5: MUL TEMP[0], CONST[8], IN[1].xxxx 6: MAD TEMP[0], CONST[9], IN[1].yyyy, TEMP[0] 7: MAD TEMP[0].xyz, CONST[10], IN[1].zzzz, TEMP[0] 8: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 9: RSQ TEMP[1].x, TEMP[1].xxxx 10: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 11: MOV TEMP[0].xyz, -TEMP[0].xyzx 12: DP3 TEMP[2].x, TEMP[0].xyzz, CONST[21].xyzz 13: MAX TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 14: MUL TEMP[1], TEMP[2].xxxx, CONST[22] 15: MUL TEMP[2], CONST[8], IN[0].xxxx 16: MAD TEMP[2], CONST[9], IN[0].yyyy, TEMP[2] 17: MAD TEMP[2], CONST[10], IN[0].zzzz, TEMP[2] 18: ADD TEMP[2].xyz, TEMP[2], CONST[11] 19: ADD TEMP[2].xyz, TEMP[2].xyzz, -CONST[23].xyzz 20: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 21: SQRT TEMP[3].x, TEMP[3].xxxx 22: RCP TEMP[4].x, TEMP[3].xxxx 23: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx 24: FSLT TEMP[3].x, TEMP[3].xxxx, CONST[23].wwww 25: UCMP TEMP[4].x, TEMP[3].xxxx, IMM[0].xxxx, TEMP[4].xxxx 26: NOT TEMP[3].x, TEMP[3].xxxx 27: UCMP TEMP[4].x, TEMP[3].xxxx, IMM[0].yyyy, TEMP[4].xxxx 28: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[2].xyzz 29: MAX TEMP[0].x, IMM[0].yyyy, TEMP[0].xxxx 30: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx 31: MAD TEMP[1], TEMP[0].xxxx, CONST[24], TEMP[1] 32: MUL TEMP[1], TEMP[1], IN[3] 33: ADD TEMP[1], TEMP[1], CONST[20] 34: MIN TEMP[0].xyz, IMM[0].xxxx, TEMP[1] 35: MOV TEMP[1].xyz, TEMP[0].xyzx 36: MOV TEMP[1].w, IN[3].wwww 37: MOV OUT[2], TEMP[1] 38: MOV OUT[3].xy, IN[2].xyxx 39: END radeonsi: Compiling shader 14 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #0 { main_body: %18 = bitcast [16 x <16 x i8>] addrspace(2)* %5 to <4 x i32> addrspace(2)* %19 = load <4 x i32>, <4 x i32> addrspace(2)* %18, align 16, !invariant.load !0 %20 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %19, i32 %14, i32 0, i1 false, i1 false) #2 %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = extractelement <4 x float> %20, i32 2 %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %25 = bitcast <16 x i8> addrspace(2)* %24 to <4 x i32> addrspace(2)* %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !invariant.load !0 %27 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %26, i32 %15, i32 0, i1 false, i1 false) #2 %28 = extractelement <4 x float> %27, i32 0 %29 = extractelement <4 x float> %27, i32 1 %30 = extractelement <4 x float> %27, i32 2 %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2, !amdgpu.uniform !0 %32 = bitcast <16 x i8> addrspace(2)* %31 to <4 x i32> addrspace(2)* %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !invariant.load !0 %34 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %33, i32 %16, i32 0, i1 false, i1 false) #2 %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3, !amdgpu.uniform !0 %38 = bitcast <16 x i8> addrspace(2)* %37 to <4 x i32> addrspace(2)* %39 = load <4 x i32>, <4 x i32> addrspace(2)* %38, align 16, !invariant.load !0 %40 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %39, i32 %17, i32 0, i1 false, i1 false) #2 %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !invariant.load !0 %47 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 256) %48 = fmul nsz float %47, %21 %49 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 260) %50 = fmul nsz float %49, %21 %51 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 264) %52 = fmul nsz float %51, %21 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !invariant.load !0 %55 = call nsz float @llvm.SI.load.const(<16 x i8> %54, i32 268) %56 = fmul nsz float %55, %21 %57 = call nsz float @llvm.SI.load.const(<16 x i8> %54, i32 272) %58 = fmul nsz float %57, %22 %59 = fadd nsz float %58, %48 %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !invariant.load !0 %62 = call nsz float @llvm.SI.load.const(<16 x i8> %61, i32 276) %63 = fmul nsz float %62, %22 %64 = fadd nsz float %63, %50 %65 = call nsz float @llvm.SI.load.const(<16 x i8> %61, i32 280) %66 = fmul nsz float %65, %22 %67 = fadd nsz float %66, %52 %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !invariant.load !0 %70 = call nsz float @llvm.SI.load.const(<16 x i8> %69, i32 284) %71 = fmul nsz float %70, %22 %72 = fadd nsz float %71, %56 %73 = call nsz float @llvm.SI.load.const(<16 x i8> %69, i32 288) %74 = fmul nsz float %73, %23 %75 = fadd nsz float %74, %59 %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !invariant.load !0 %78 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 292) %79 = fmul nsz float %78, %23 %80 = fadd nsz float %79, %64 %81 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 296) %82 = fmul nsz float %81, %23 %83 = fadd nsz float %82, %67 %84 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %85 = load <16 x i8>, <16 x i8> addrspace(2)* %84, align 16, !invariant.load !0 %86 = call nsz float @llvm.SI.load.const(<16 x i8> %85, i32 300) %87 = fmul nsz float %86, %23 %88 = fadd nsz float %87, %72 %89 = call nsz float @llvm.SI.load.const(<16 x i8> %85, i32 304) %90 = fadd nsz float %75, %89 %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !invariant.load !0 %93 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 308) %94 = fadd nsz float %80, %93 %95 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 312) %96 = fadd nsz float %83, %95 %97 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 316) %98 = fadd nsz float %88, %97 %99 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !invariant.load !0 %101 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 128) %102 = fmul nsz float %101, %28 %103 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 132) %104 = fmul nsz float %103, %28 %105 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 136) %106 = fmul nsz float %105, %28 %107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !invariant.load !0 %109 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 144) %110 = fmul nsz float %109, %29 %111 = fadd nsz float %110, %102 %112 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 148) %113 = fmul nsz float %112, %29 %114 = fadd nsz float %113, %104 %115 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %116 = load <16 x i8>, <16 x i8> addrspace(2)* %115, align 16, !invariant.load !0 %117 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 152) %118 = fmul nsz float %117, %29 %119 = fadd nsz float %118, %106 %120 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 160) %121 = fmul nsz float %120, %30 %122 = fadd nsz float %121, %111 %123 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %124 = load <16 x i8>, <16 x i8> addrspace(2)* %123, align 16, !invariant.load !0 %125 = call nsz float @llvm.SI.load.const(<16 x i8> %124, i32 164) %126 = fmul nsz float %125, %30 %127 = fadd nsz float %126, %114 %128 = call nsz float @llvm.SI.load.const(<16 x i8> %124, i32 168) %129 = fmul nsz float %128, %30 %130 = fadd nsz float %129, %119 %131 = fmul nsz float %122, %122 %132 = fmul nsz float %127, %127 %133 = fadd nsz float %132, %131 %134 = fmul nsz float %130, %130 %135 = fadd nsz float %133, %134 %136 = call nsz float @llvm.sqrt.f32(float %135) #2 %137 = fdiv nsz float 1.000000e+00, %136, !fpmath !1 %138 = fmul nsz float %122, %137 %139 = fmul nsz float %127, %137 %140 = fmul nsz float %130, %137 %141 = fsub nsz float -0.000000e+00, %138 %142 = fsub nsz float -0.000000e+00, %139 %143 = fsub nsz float -0.000000e+00, %140 %144 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %145 = load <16 x i8>, <16 x i8> addrspace(2)* %144, align 16, !invariant.load !0 %146 = call nsz float @llvm.SI.load.const(<16 x i8> %145, i32 336) %147 = call nsz float @llvm.SI.load.const(<16 x i8> %145, i32 340) %148 = call nsz float @llvm.SI.load.const(<16 x i8> %145, i32 344) %149 = fmul nsz float %146, %141 %150 = fmul nsz float %147, %142 %151 = fadd nsz float %150, %149 %152 = fmul nsz float %148, %143 %153 = fadd nsz float %151, %152 %154 = call nsz float @llvm.maxnum.f32(float %153, float 0.000000e+00) #2 %155 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %156 = load <16 x i8>, <16 x i8> addrspace(2)* %155, align 16, !invariant.load !0 %157 = call nsz float @llvm.SI.load.const(<16 x i8> %156, i32 352) %158 = fmul nsz float %154, %157 %159 = call nsz float @llvm.SI.load.const(<16 x i8> %156, i32 356) %160 = fmul nsz float %154, %159 %161 = call nsz float @llvm.SI.load.const(<16 x i8> %156, i32 360) %162 = fmul nsz float %154, %161 %163 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %164 = load <16 x i8>, <16 x i8> addrspace(2)* %163, align 16, !invariant.load !0 %165 = call nsz float @llvm.SI.load.const(<16 x i8> %164, i32 128) %166 = fmul nsz float %165, %21 %167 = call nsz float @llvm.SI.load.const(<16 x i8> %164, i32 132) %168 = fmul nsz float %167, %21 %169 = call nsz float @llvm.SI.load.const(<16 x i8> %164, i32 136) %170 = fmul nsz float %169, %21 %171 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %172 = load <16 x i8>, <16 x i8> addrspace(2)* %171, align 16, !invariant.load !0 %173 = call nsz float @llvm.SI.load.const(<16 x i8> %172, i32 144) %174 = fmul nsz float %173, %22 %175 = fadd nsz float %174, %166 %176 = call nsz float @llvm.SI.load.const(<16 x i8> %172, i32 148) %177 = fmul nsz float %176, %22 %178 = fadd nsz float %177, %168 %179 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %180 = load <16 x i8>, <16 x i8> addrspace(2)* %179, align 16, !invariant.load !0 %181 = call nsz float @llvm.SI.load.const(<16 x i8> %180, i32 152) %182 = fmul nsz float %181, %22 %183 = fadd nsz float %182, %170 %184 = call nsz float @llvm.SI.load.const(<16 x i8> %180, i32 160) %185 = fmul nsz float %184, %23 %186 = fadd nsz float %185, %175 %187 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %188 = load <16 x i8>, <16 x i8> addrspace(2)* %187, align 16, !invariant.load !0 %189 = call nsz float @llvm.SI.load.const(<16 x i8> %188, i32 164) %190 = fmul nsz float %189, %23 %191 = fadd nsz float %190, %178 %192 = call nsz float @llvm.SI.load.const(<16 x i8> %188, i32 168) %193 = fmul nsz float %192, %23 %194 = fadd nsz float %193, %183 %195 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %196 = load <16 x i8>, <16 x i8> addrspace(2)* %195, align 16, !invariant.load !0 %197 = call nsz float @llvm.SI.load.const(<16 x i8> %196, i32 176) %198 = fadd nsz float %186, %197 %199 = call nsz float @llvm.SI.load.const(<16 x i8> %196, i32 180) %200 = fadd nsz float %191, %199 %201 = call nsz float @llvm.SI.load.const(<16 x i8> %196, i32 184) %202 = fadd nsz float %194, %201 %203 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %204 = load <16 x i8>, <16 x i8> addrspace(2)* %203, align 16, !invariant.load !0 %205 = call nsz float @llvm.SI.load.const(<16 x i8> %204, i32 368) %206 = fsub nsz float %198, %205 %207 = call nsz float @llvm.SI.load.const(<16 x i8> %204, i32 372) %208 = fsub nsz float %200, %207 %209 = call nsz float @llvm.SI.load.const(<16 x i8> %204, i32 376) %210 = fsub nsz float %202, %209 %211 = fmul nsz float %206, %206 %212 = fmul nsz float %208, %208 %213 = fadd nsz float %212, %211 %214 = fmul nsz float %210, %210 %215 = fadd nsz float %213, %214 %216 = call nsz float @llvm.sqrt.f32(float %215) #2 %217 = fdiv nsz float 1.000000e+00, %216, !fpmath !1 %218 = fmul nsz float %206, %217 %219 = fmul nsz float %208, %217 %220 = fmul nsz float %210, %217 %221 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %222 = load <16 x i8>, <16 x i8> addrspace(2)* %221, align 16, !invariant.load !0 %223 = call nsz float @llvm.SI.load.const(<16 x i8> %222, i32 380) %224 = fcmp nsz olt float %216, %223 %225 = select i1 %224, float 1.000000e+00, float 0.000000e+00 %226 = fmul nsz float %218, %141 %227 = fmul nsz float %219, %142 %228 = fadd nsz float %227, %226 %229 = fmul nsz float %220, %143 %230 = fadd nsz float %228, %229 %231 = call nsz float @llvm.maxnum.f32(float %230, float 0.000000e+00) #2 %232 = fmul nsz float %231, %225 %233 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %234 = load <16 x i8>, <16 x i8> addrspace(2)* %233, align 16, !invariant.load !0 %235 = call nsz float @llvm.SI.load.const(<16 x i8> %234, i32 384) %236 = fmul nsz float %232, %235 %237 = fadd nsz float %236, %158 %238 = call nsz float @llvm.SI.load.const(<16 x i8> %234, i32 388) %239 = fmul nsz float %232, %238 %240 = fadd nsz float %239, %160 %241 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %242 = load <16 x i8>, <16 x i8> addrspace(2)* %241, align 16, !invariant.load !0 %243 = call nsz float @llvm.SI.load.const(<16 x i8> %242, i32 392) %244 = fmul nsz float %232, %243 %245 = fadd nsz float %244, %162 %246 = fmul nsz float %237, %41 %247 = fmul nsz float %240, %42 %248 = fmul nsz float %245, %43 %249 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %250 = load <16 x i8>, <16 x i8> addrspace(2)* %249, align 16, !invariant.load !0 %251 = call nsz float @llvm.SI.load.const(<16 x i8> %250, i32 320) %252 = fadd nsz float %246, %251 %253 = call nsz float @llvm.SI.load.const(<16 x i8> %250, i32 324) %254 = fadd nsz float %247, %253 %255 = call nsz float @llvm.SI.load.const(<16 x i8> %250, i32 328) %256 = fadd nsz float %248, %255 %257 = call nsz float @llvm.minnum.f32(float %252, float 1.000000e+00) #2 %258 = call nsz float @llvm.minnum.f32(float %254, float 1.000000e+00) #2 %259 = call nsz float @llvm.minnum.f32(float %256, float 1.000000e+00) #2 %260 = bitcast i32 %12 to float %261 = insertvalue <{ float, float, float }> undef, float %260, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %257, float %258, float %259, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %35, float %36, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %90, float %94, float %96, float %98) call void @llvm.SI.export(i32 1, i32 0, i32 1, i32 13, i32 0, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret <{ float, float, float }> %261 } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #2 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #3 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } attributes #3 = { nounwind } !0 = !{} !1 = !{float 2.500000e+00} VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1].x, PSIZE DCL OUT[2], GENERIC[0] DCL OUT[3].xyz, GENERIC[1] DCL CONST[0..26] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {0, 1, 0, 0} 0: MUL TEMP[0], CONST[12], IN[0].xxxx 1: MAD TEMP[0], CONST[13], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[14], IN[0].zzzz, TEMP[0] 3: ADD TEMP[0].z, TEMP[0], CONST[15] 4: ADD TEMP[0].x, TEMP[0].zzzz, -CONST[20].xxxx 5: MUL TEMP[0].x, TEMP[0].xxxx, CONST[21].xxxx 6: MUL TEMP[1], CONST[16], IN[0].xxxx 7: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1] 8: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1] 9: ADD OUT[0], TEMP[1], CONST[19] 10: MOV OUT[1].x, IMM[0].xxxx 11: MUL TEMP[1], CONST[8], IN[1].xxxx 12: MAD TEMP[1], CONST[9], IN[1].yyyy, TEMP[1] 13: MAD TEMP[1].xyz, CONST[10], IN[1].zzzz, TEMP[1] 14: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 15: RSQ TEMP[2].x, TEMP[2].xxxx 16: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 17: MOV TEMP[1].xyz, -TEMP[1].xyzx 18: DP3 TEMP[3].x, TEMP[1].xyzz, CONST[23].xyzz 19: MAX TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 20: MUL TEMP[2], TEMP[3].xxxx, CONST[24] 21: MUL TEMP[3], CONST[8], IN[0].xxxx 22: MAD TEMP[3], CONST[9], IN[0].yyyy, TEMP[3] 23: MAD TEMP[3], CONST[10], IN[0].zzzz, TEMP[3] 24: ADD TEMP[3].xyz, TEMP[3], CONST[11] 25: ADD TEMP[3].xyz, TEMP[3].xyzz, -CONST[25].xyzz 26: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 27: SQRT TEMP[4].x, TEMP[4].xxxx 28: RCP TEMP[5].x, TEMP[4].xxxx 29: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xxxx 30: FSLT TEMP[4].x, TEMP[4].xxxx, CONST[25].wwww 31: UCMP TEMP[5].x, TEMP[4].xxxx, IMM[0].xxxx, TEMP[5].xxxx 32: NOT TEMP[4].x, TEMP[4].xxxx 33: UCMP TEMP[5].x, TEMP[4].xxxx, IMM[0].yyyy, TEMP[5].xxxx 34: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[3].xyzz 35: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 36: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx 37: MAD TEMP[2], TEMP[1].xxxx, CONST[26], TEMP[2] 38: MUL TEMP[2], TEMP[2], IN[3] 39: ADD TEMP[2], TEMP[2], CONST[22] 40: MIN TEMP[1].xyz, IMM[0].xxxx, TEMP[2] 41: MOV TEMP[2].xyz, TEMP[1].xyzx 42: MOV TEMP[2].w, IN[3].wwww 43: MOV OUT[2], TEMP[2] 44: MOV OUT[3].z, TEMP[0].xxxx 45: MOV OUT[3].xy, IN[2].xyxx 46: END radeonsi: Compiling shader 15 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #0 { main_body: %18 = bitcast [16 x <16 x i8>] addrspace(2)* %5 to <4 x i32> addrspace(2)* %19 = load <4 x i32>, <4 x i32> addrspace(2)* %18, align 16, !invariant.load !0 %20 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %19, i32 %14, i32 0, i1 false, i1 false) #2 %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = extractelement <4 x float> %20, i32 2 %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %25 = bitcast <16 x i8> addrspace(2)* %24 to <4 x i32> addrspace(2)* %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !invariant.load !0 %27 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %26, i32 %15, i32 0, i1 false, i1 false) #2 %28 = extractelement <4 x float> %27, i32 0 %29 = extractelement <4 x float> %27, i32 1 %30 = extractelement <4 x float> %27, i32 2 %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2, !amdgpu.uniform !0 %32 = bitcast <16 x i8> addrspace(2)* %31 to <4 x i32> addrspace(2)* %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !invariant.load !0 %34 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %33, i32 %16, i32 0, i1 false, i1 false) #2 %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3, !amdgpu.uniform !0 %38 = bitcast <16 x i8> addrspace(2)* %37 to <4 x i32> addrspace(2)* %39 = load <4 x i32>, <4 x i32> addrspace(2)* %38, align 16, !invariant.load !0 %40 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %39, i32 %17, i32 0, i1 false, i1 false) #2 %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !invariant.load !0 %47 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 200) %48 = fmul nsz float %47, %21 %49 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 216) %50 = fmul nsz float %49, %22 %51 = fadd nsz float %50, %48 %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !invariant.load !0 %54 = call nsz float @llvm.SI.load.const(<16 x i8> %53, i32 232) %55 = fmul nsz float %54, %23 %56 = fadd nsz float %55, %51 %57 = call nsz float @llvm.SI.load.const(<16 x i8> %53, i32 248) %58 = fadd nsz float %56, %57 %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !invariant.load !0 %61 = call nsz float @llvm.SI.load.const(<16 x i8> %60, i32 320) %62 = fsub nsz float %58, %61 %63 = call nsz float @llvm.SI.load.const(<16 x i8> %60, i32 336) %64 = fmul nsz float %62, %63 %65 = call nsz float @llvm.SI.load.const(<16 x i8> %60, i32 256) %66 = fmul nsz float %65, %21 %67 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !invariant.load !0 %69 = call nsz float @llvm.SI.load.const(<16 x i8> %68, i32 260) %70 = fmul nsz float %69, %21 %71 = call nsz float @llvm.SI.load.const(<16 x i8> %68, i32 264) %72 = fmul nsz float %71, %21 %73 = call nsz float @llvm.SI.load.const(<16 x i8> %68, i32 268) %74 = fmul nsz float %73, %21 %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !invariant.load !0 %77 = call nsz float @llvm.SI.load.const(<16 x i8> %76, i32 272) %78 = fmul nsz float %77, %22 %79 = fadd nsz float %78, %66 %80 = call nsz float @llvm.SI.load.const(<16 x i8> %76, i32 276) %81 = fmul nsz float %80, %22 %82 = fadd nsz float %81, %70 %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !invariant.load !0 %85 = call nsz float @llvm.SI.load.const(<16 x i8> %84, i32 280) %86 = fmul nsz float %85, %22 %87 = fadd nsz float %86, %72 %88 = call nsz float @llvm.SI.load.const(<16 x i8> %84, i32 284) %89 = fmul nsz float %88, %22 %90 = fadd nsz float %89, %74 %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !invariant.load !0 %93 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 288) %94 = fmul nsz float %93, %23 %95 = fadd nsz float %94, %79 %96 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 292) %97 = fmul nsz float %96, %23 %98 = fadd nsz float %97, %82 %99 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !invariant.load !0 %101 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 296) %102 = fmul nsz float %101, %23 %103 = fadd nsz float %102, %87 %104 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 300) %105 = fmul nsz float %104, %23 %106 = fadd nsz float %105, %90 %107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !invariant.load !0 %109 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 304) %110 = fadd nsz float %95, %109 %111 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 308) %112 = fadd nsz float %98, %111 %113 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 312) %114 = fadd nsz float %103, %113 %115 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %116 = load <16 x i8>, <16 x i8> addrspace(2)* %115, align 16, !invariant.load !0 %117 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 316) %118 = fadd nsz float %106, %117 %119 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 128) %120 = fmul nsz float %119, %28 %121 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 132) %122 = fmul nsz float %121, %28 %123 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %124 = load <16 x i8>, <16 x i8> addrspace(2)* %123, align 16, !invariant.load !0 %125 = call nsz float @llvm.SI.load.const(<16 x i8> %124, i32 136) %126 = fmul nsz float %125, %28 %127 = call nsz float @llvm.SI.load.const(<16 x i8> %124, i32 144) %128 = fmul nsz float %127, %29 %129 = fadd nsz float %128, %120 %130 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %131 = load <16 x i8>, <16 x i8> addrspace(2)* %130, align 16, !invariant.load !0 %132 = call nsz float @llvm.SI.load.const(<16 x i8> %131, i32 148) %133 = fmul nsz float %132, %29 %134 = fadd nsz float %133, %122 %135 = call nsz float @llvm.SI.load.const(<16 x i8> %131, i32 152) %136 = fmul nsz float %135, %29 %137 = fadd nsz float %136, %126 %138 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %139 = load <16 x i8>, <16 x i8> addrspace(2)* %138, align 16, !invariant.load !0 %140 = call nsz float @llvm.SI.load.const(<16 x i8> %139, i32 160) %141 = fmul nsz float %140, %30 %142 = fadd nsz float %141, %129 %143 = call nsz float @llvm.SI.load.const(<16 x i8> %139, i32 164) %144 = fmul nsz float %143, %30 %145 = fadd nsz float %144, %134 %146 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %147 = load <16 x i8>, <16 x i8> addrspace(2)* %146, align 16, !invariant.load !0 %148 = call nsz float @llvm.SI.load.const(<16 x i8> %147, i32 168) %149 = fmul nsz float %148, %30 %150 = fadd nsz float %149, %137 %151 = fmul nsz float %142, %142 %152 = fmul nsz float %145, %145 %153 = fadd nsz float %152, %151 %154 = fmul nsz float %150, %150 %155 = fadd nsz float %153, %154 %156 = call nsz float @llvm.sqrt.f32(float %155) #2 %157 = fdiv nsz float 1.000000e+00, %156, !fpmath !1 %158 = fmul nsz float %142, %157 %159 = fmul nsz float %145, %157 %160 = fmul nsz float %150, %157 %161 = fsub nsz float -0.000000e+00, %158 %162 = fsub nsz float -0.000000e+00, %159 %163 = fsub nsz float -0.000000e+00, %160 %164 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %165 = load <16 x i8>, <16 x i8> addrspace(2)* %164, align 16, !invariant.load !0 %166 = call nsz float @llvm.SI.load.const(<16 x i8> %165, i32 368) %167 = call nsz float @llvm.SI.load.const(<16 x i8> %165, i32 372) %168 = call nsz float @llvm.SI.load.const(<16 x i8> %165, i32 376) %169 = fmul nsz float %166, %161 %170 = fmul nsz float %167, %162 %171 = fadd nsz float %170, %169 %172 = fmul nsz float %168, %163 %173 = fadd nsz float %171, %172 %174 = call nsz float @llvm.maxnum.f32(float %173, float 0.000000e+00) #2 %175 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %176 = load <16 x i8>, <16 x i8> addrspace(2)* %175, align 16, !invariant.load !0 %177 = call nsz float @llvm.SI.load.const(<16 x i8> %176, i32 384) %178 = fmul nsz float %174, %177 %179 = call nsz float @llvm.SI.load.const(<16 x i8> %176, i32 388) %180 = fmul nsz float %174, %179 %181 = call nsz float @llvm.SI.load.const(<16 x i8> %176, i32 392) %182 = fmul nsz float %174, %181 %183 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %184 = load <16 x i8>, <16 x i8> addrspace(2)* %183, align 16, !invariant.load !0 %185 = call nsz float @llvm.SI.load.const(<16 x i8> %184, i32 128) %186 = fmul nsz float %185, %21 %187 = call nsz float @llvm.SI.load.const(<16 x i8> %184, i32 132) %188 = fmul nsz float %187, %21 %189 = call nsz float @llvm.SI.load.const(<16 x i8> %184, i32 136) %190 = fmul nsz float %189, %21 %191 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %192 = load <16 x i8>, <16 x i8> addrspace(2)* %191, align 16, !invariant.load !0 %193 = call nsz float @llvm.SI.load.const(<16 x i8> %192, i32 144) %194 = fmul nsz float %193, %22 %195 = fadd nsz float %194, %186 %196 = call nsz float @llvm.SI.load.const(<16 x i8> %192, i32 148) %197 = fmul nsz float %196, %22 %198 = fadd nsz float %197, %188 %199 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %200 = load <16 x i8>, <16 x i8> addrspace(2)* %199, align 16, !invariant.load !0 %201 = call nsz float @llvm.SI.load.const(<16 x i8> %200, i32 152) %202 = fmul nsz float %201, %22 %203 = fadd nsz float %202, %190 %204 = call nsz float @llvm.SI.load.const(<16 x i8> %200, i32 160) %205 = fmul nsz float %204, %23 %206 = fadd nsz float %205, %195 %207 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %208 = load <16 x i8>, <16 x i8> addrspace(2)* %207, align 16, !invariant.load !0 %209 = call nsz float @llvm.SI.load.const(<16 x i8> %208, i32 164) %210 = fmul nsz float %209, %23 %211 = fadd nsz float %210, %198 %212 = call nsz float @llvm.SI.load.const(<16 x i8> %208, i32 168) %213 = fmul nsz float %212, %23 %214 = fadd nsz float %213, %203 %215 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %216 = load <16 x i8>, <16 x i8> addrspace(2)* %215, align 16, !invariant.load !0 %217 = call nsz float @llvm.SI.load.const(<16 x i8> %216, i32 176) %218 = fadd nsz float %206, %217 %219 = call nsz float @llvm.SI.load.const(<16 x i8> %216, i32 180) %220 = fadd nsz float %211, %219 %221 = call nsz float @llvm.SI.load.const(<16 x i8> %216, i32 184) %222 = fadd nsz float %214, %221 %223 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %224 = load <16 x i8>, <16 x i8> addrspace(2)* %223, align 16, !invariant.load !0 %225 = call nsz float @llvm.SI.load.const(<16 x i8> %224, i32 400) %226 = fsub nsz float %218, %225 %227 = call nsz float @llvm.SI.load.const(<16 x i8> %224, i32 404) %228 = fsub nsz float %220, %227 %229 = call nsz float @llvm.SI.load.const(<16 x i8> %224, i32 408) %230 = fsub nsz float %222, %229 %231 = fmul nsz float %226, %226 %232 = fmul nsz float %228, %228 %233 = fadd nsz float %232, %231 %234 = fmul nsz float %230, %230 %235 = fadd nsz float %233, %234 %236 = call nsz float @llvm.sqrt.f32(float %235) #2 %237 = fdiv nsz float 1.000000e+00, %236, !fpmath !1 %238 = fmul nsz float %226, %237 %239 = fmul nsz float %228, %237 %240 = fmul nsz float %230, %237 %241 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %242 = load <16 x i8>, <16 x i8> addrspace(2)* %241, align 16, !invariant.load !0 %243 = call nsz float @llvm.SI.load.const(<16 x i8> %242, i32 412) %244 = fcmp nsz olt float %236, %243 %245 = select i1 %244, float 1.000000e+00, float 0.000000e+00 %246 = fmul nsz float %238, %161 %247 = fmul nsz float %239, %162 %248 = fadd nsz float %247, %246 %249 = fmul nsz float %240, %163 %250 = fadd nsz float %248, %249 %251 = call nsz float @llvm.maxnum.f32(float %250, float 0.000000e+00) #2 %252 = fmul nsz float %251, %245 %253 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %254 = load <16 x i8>, <16 x i8> addrspace(2)* %253, align 16, !invariant.load !0 %255 = call nsz float @llvm.SI.load.const(<16 x i8> %254, i32 416) %256 = fmul nsz float %252, %255 %257 = fadd nsz float %256, %178 %258 = call nsz float @llvm.SI.load.const(<16 x i8> %254, i32 420) %259 = fmul nsz float %252, %258 %260 = fadd nsz float %259, %180 %261 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %262 = load <16 x i8>, <16 x i8> addrspace(2)* %261, align 16, !invariant.load !0 %263 = call nsz float @llvm.SI.load.const(<16 x i8> %262, i32 424) %264 = fmul nsz float %252, %263 %265 = fadd nsz float %264, %182 %266 = fmul nsz float %257, %41 %267 = fmul nsz float %260, %42 %268 = fmul nsz float %265, %43 %269 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %270 = load <16 x i8>, <16 x i8> addrspace(2)* %269, align 16, !invariant.load !0 %271 = call nsz float @llvm.SI.load.const(<16 x i8> %270, i32 352) %272 = fadd nsz float %266, %271 %273 = call nsz float @llvm.SI.load.const(<16 x i8> %270, i32 356) %274 = fadd nsz float %267, %273 %275 = call nsz float @llvm.SI.load.const(<16 x i8> %270, i32 360) %276 = fadd nsz float %268, %275 %277 = call nsz float @llvm.minnum.f32(float %272, float 1.000000e+00) #2 %278 = call nsz float @llvm.minnum.f32(float %274, float 1.000000e+00) #2 %279 = call nsz float @llvm.minnum.f32(float %276, float 1.000000e+00) #2 %280 = bitcast i32 %12 to float %281 = insertvalue <{ float, float, float }> undef, float %280, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %277, float %278, float %279, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %35, float %36, float %64, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %110, float %112, float %114, float %118) call void @llvm.SI.export(i32 1, i32 0, i32 1, i32 13, i32 0, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret <{ float, float, float }> %281 } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #2 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #3 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } attributes #3 = { nounwind } !0 = !{} !1 = !{float 2.500000e+00} VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1].x, PSIZE DCL OUT[2], GENERIC[0] DCL OUT[3].xy, GENERIC[1] DCL CONST[0..28] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {0, 1, 2, 0} 0: MUL TEMP[0], CONST[16], IN[0].xxxx 1: MAD TEMP[0], CONST[17], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[18], IN[0].zzzz, TEMP[0] 3: ADD OUT[0], TEMP[0], CONST[19] 4: MOV OUT[1].x, IMM[0].xxxx 5: MUL TEMP[0], CONST[8], IN[1].xxxx 6: MAD TEMP[0], CONST[9], IN[1].yyyy, TEMP[0] 7: MAD TEMP[0].xyz, CONST[10], IN[1].zzzz, TEMP[0] 8: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 9: RSQ TEMP[1].x, TEMP[1].xxxx 10: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 11: MOV TEMP[0].xyz, -TEMP[0].xyzx 12: MUL TEMP[1], CONST[8], IN[0].xxxx 13: MAD TEMP[1], CONST[9], IN[0].yyyy, TEMP[1] 14: MAD TEMP[1], CONST[10], IN[0].zzzz, TEMP[1] 15: ADD TEMP[1].xyz, TEMP[1], CONST[11] 16: DP3 TEMP[2].x, TEMP[0].xyzz, CONST[21].xyzz 17: MAX TEMP[3].x, IMM[0].yyyy, TEMP[2].xxxx 18: MUL TEMP[2], TEMP[3].xxxx, CONST[23] 19: DP3 TEMP[3].x, TEMP[0].xyzz, CONST[22].xyzz 20: MAX TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 21: MAD TEMP[2], TEMP[3].xxxx, CONST[24], TEMP[2] 22: ADD TEMP[3].xyz, TEMP[1].xyzz, -CONST[25].xyzz 23: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 24: SQRT TEMP[4].x, TEMP[4].xxxx 25: RCP TEMP[5].x, TEMP[4].xxxx 26: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xxxx 27: FSLT TEMP[4].x, TEMP[4].xxxx, CONST[25].wwww 28: UCMP TEMP[5].x, TEMP[4].xxxx, IMM[0].xxxx, TEMP[5].xxxx 29: NOT TEMP[4].x, TEMP[4].xxxx 30: UCMP TEMP[5].x, TEMP[4].xxxx, IMM[0].yyyy, TEMP[5].xxxx 31: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[3].xyzz 32: MAX TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 33: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[5].xxxx 34: MAD TEMP[2], TEMP[3].xxxx, CONST[27], TEMP[2] 35: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[26].xyzz 36: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz 37: SQRT TEMP[3].x, TEMP[3].xxxx 38: RCP TEMP[4].x, TEMP[3].xxxx 39: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx 40: FSLT TEMP[3].x, TEMP[3].xxxx, CONST[26].wwww 41: UCMP TEMP[4].x, TEMP[3].xxxx, IMM[0].xxxx, TEMP[4].xxxx 42: NOT TEMP[3].x, TEMP[3].xxxx 43: UCMP TEMP[4].x, TEMP[3].xxxx, IMM[0].yyyy, TEMP[4].xxxx 44: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[1].xyzz 45: MAX TEMP[0].x, IMM[0].yyyy, TEMP[0].xxxx 46: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx 47: MAD TEMP[2], TEMP[0].xxxx, CONST[28], TEMP[2] 48: MUL TEMP[2], TEMP[2], IN[3] 49: ADD TEMP[2], TEMP[2], CONST[20] 50: MIN TEMP[0].xyz, IMM[0].xxxx, TEMP[2] 51: MOV TEMP[2].xyz, TEMP[0].xyzx 52: MOV TEMP[2].w, IN[3].wwww 53: MOV OUT[2], TEMP[2] 54: MOV OUT[3].xy, IN[2].xyxx 55: END radeonsi: Compiling shader 16 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #0 { main_body: %18 = bitcast [16 x <16 x i8>] addrspace(2)* %5 to <4 x i32> addrspace(2)* %19 = load <4 x i32>, <4 x i32> addrspace(2)* %18, align 16, !invariant.load !0 %20 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %19, i32 %14, i32 0, i1 false, i1 false) #2 %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = extractelement <4 x float> %20, i32 2 %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %25 = bitcast <16 x i8> addrspace(2)* %24 to <4 x i32> addrspace(2)* %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !invariant.load !0 %27 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %26, i32 %15, i32 0, i1 false, i1 false) #2 %28 = extractelement <4 x float> %27, i32 0 %29 = extractelement <4 x float> %27, i32 1 %30 = extractelement <4 x float> %27, i32 2 %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2, !amdgpu.uniform !0 %32 = bitcast <16 x i8> addrspace(2)* %31 to <4 x i32> addrspace(2)* %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !invariant.load !0 %34 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %33, i32 %16, i32 0, i1 false, i1 false) #2 %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3, !amdgpu.uniform !0 %38 = bitcast <16 x i8> addrspace(2)* %37 to <4 x i32> addrspace(2)* %39 = load <4 x i32>, <4 x i32> addrspace(2)* %38, align 16, !invariant.load !0 %40 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %39, i32 %17, i32 0, i1 false, i1 false) #2 %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !invariant.load !0 %47 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 256) %48 = fmul nsz float %47, %21 %49 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 260) %50 = fmul nsz float %49, %21 %51 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 264) %52 = fmul nsz float %51, %21 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !invariant.load !0 %55 = call nsz float @llvm.SI.load.const(<16 x i8> %54, i32 268) %56 = fmul nsz float %55, %21 %57 = call nsz float @llvm.SI.load.const(<16 x i8> %54, i32 272) %58 = fmul nsz float %57, %22 %59 = fadd nsz float %58, %48 %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !invariant.load !0 %62 = call nsz float @llvm.SI.load.const(<16 x i8> %61, i32 276) %63 = fmul nsz float %62, %22 %64 = fadd nsz float %63, %50 %65 = call nsz float @llvm.SI.load.const(<16 x i8> %61, i32 280) %66 = fmul nsz float %65, %22 %67 = fadd nsz float %66, %52 %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !invariant.load !0 %70 = call nsz float @llvm.SI.load.const(<16 x i8> %69, i32 284) %71 = fmul nsz float %70, %22 %72 = fadd nsz float %71, %56 %73 = call nsz float @llvm.SI.load.const(<16 x i8> %69, i32 288) %74 = fmul nsz float %73, %23 %75 = fadd nsz float %74, %59 %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !invariant.load !0 %78 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 292) %79 = fmul nsz float %78, %23 %80 = fadd nsz float %79, %64 %81 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 296) %82 = fmul nsz float %81, %23 %83 = fadd nsz float %82, %67 %84 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %85 = load <16 x i8>, <16 x i8> addrspace(2)* %84, align 16, !invariant.load !0 %86 = call nsz float @llvm.SI.load.const(<16 x i8> %85, i32 300) %87 = fmul nsz float %86, %23 %88 = fadd nsz float %87, %72 %89 = call nsz float @llvm.SI.load.const(<16 x i8> %85, i32 304) %90 = fadd nsz float %75, %89 %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !invariant.load !0 %93 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 308) %94 = fadd nsz float %80, %93 %95 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 312) %96 = fadd nsz float %83, %95 %97 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 316) %98 = fadd nsz float %88, %97 %99 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !invariant.load !0 %101 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 128) %102 = fmul nsz float %101, %28 %103 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 132) %104 = fmul nsz float %103, %28 %105 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 136) %106 = fmul nsz float %105, %28 %107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !invariant.load !0 %109 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 144) %110 = fmul nsz float %109, %29 %111 = fadd nsz float %110, %102 %112 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 148) %113 = fmul nsz float %112, %29 %114 = fadd nsz float %113, %104 %115 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %116 = load <16 x i8>, <16 x i8> addrspace(2)* %115, align 16, !invariant.load !0 %117 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 152) %118 = fmul nsz float %117, %29 %119 = fadd nsz float %118, %106 %120 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 160) %121 = fmul nsz float %120, %30 %122 = fadd nsz float %121, %111 %123 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %124 = load <16 x i8>, <16 x i8> addrspace(2)* %123, align 16, !invariant.load !0 %125 = call nsz float @llvm.SI.load.const(<16 x i8> %124, i32 164) %126 = fmul nsz float %125, %30 %127 = fadd nsz float %126, %114 %128 = call nsz float @llvm.SI.load.const(<16 x i8> %124, i32 168) %129 = fmul nsz float %128, %30 %130 = fadd nsz float %129, %119 %131 = fmul nsz float %122, %122 %132 = fmul nsz float %127, %127 %133 = fadd nsz float %132, %131 %134 = fmul nsz float %130, %130 %135 = fadd nsz float %133, %134 %136 = call nsz float @llvm.sqrt.f32(float %135) #2 %137 = fdiv nsz float 1.000000e+00, %136, !fpmath !1 %138 = fmul nsz float %122, %137 %139 = fmul nsz float %127, %137 %140 = fmul nsz float %130, %137 %141 = fsub nsz float -0.000000e+00, %138 %142 = fsub nsz float -0.000000e+00, %139 %143 = fsub nsz float -0.000000e+00, %140 %144 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %145 = load <16 x i8>, <16 x i8> addrspace(2)* %144, align 16, !invariant.load !0 %146 = call nsz float @llvm.SI.load.const(<16 x i8> %145, i32 128) %147 = fmul nsz float %146, %21 %148 = call nsz float @llvm.SI.load.const(<16 x i8> %145, i32 132) %149 = fmul nsz float %148, %21 %150 = call nsz float @llvm.SI.load.const(<16 x i8> %145, i32 136) %151 = fmul nsz float %150, %21 %152 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %153 = load <16 x i8>, <16 x i8> addrspace(2)* %152, align 16, !invariant.load !0 %154 = call nsz float @llvm.SI.load.const(<16 x i8> %153, i32 144) %155 = fmul nsz float %154, %22 %156 = fadd nsz float %155, %147 %157 = call nsz float @llvm.SI.load.const(<16 x i8> %153, i32 148) %158 = fmul nsz float %157, %22 %159 = fadd nsz float %158, %149 %160 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %161 = load <16 x i8>, <16 x i8> addrspace(2)* %160, align 16, !invariant.load !0 %162 = call nsz float @llvm.SI.load.const(<16 x i8> %161, i32 152) %163 = fmul nsz float %162, %22 %164 = fadd nsz float %163, %151 %165 = call nsz float @llvm.SI.load.const(<16 x i8> %161, i32 160) %166 = fmul nsz float %165, %23 %167 = fadd nsz float %166, %156 %168 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %169 = load <16 x i8>, <16 x i8> addrspace(2)* %168, align 16, !invariant.load !0 %170 = call nsz float @llvm.SI.load.const(<16 x i8> %169, i32 164) %171 = fmul nsz float %170, %23 %172 = fadd nsz float %171, %159 %173 = call nsz float @llvm.SI.load.const(<16 x i8> %169, i32 168) %174 = fmul nsz float %173, %23 %175 = fadd nsz float %174, %164 %176 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %177 = load <16 x i8>, <16 x i8> addrspace(2)* %176, align 16, !invariant.load !0 %178 = call nsz float @llvm.SI.load.const(<16 x i8> %177, i32 176) %179 = fadd nsz float %167, %178 %180 = call nsz float @llvm.SI.load.const(<16 x i8> %177, i32 180) %181 = fadd nsz float %172, %180 %182 = call nsz float @llvm.SI.load.const(<16 x i8> %177, i32 184) %183 = fadd nsz float %175, %182 %184 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %185 = load <16 x i8>, <16 x i8> addrspace(2)* %184, align 16, !invariant.load !0 %186 = call nsz float @llvm.SI.load.const(<16 x i8> %185, i32 336) %187 = call nsz float @llvm.SI.load.const(<16 x i8> %185, i32 340) %188 = call nsz float @llvm.SI.load.const(<16 x i8> %185, i32 344) %189 = fmul nsz float %186, %141 %190 = fmul nsz float %187, %142 %191 = fadd nsz float %190, %189 %192 = fmul nsz float %188, %143 %193 = fadd nsz float %191, %192 %194 = call nsz float @llvm.maxnum.f32(float %193, float 0.000000e+00) #2 %195 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %196 = load <16 x i8>, <16 x i8> addrspace(2)* %195, align 16, !invariant.load !0 %197 = call nsz float @llvm.SI.load.const(<16 x i8> %196, i32 368) %198 = fmul nsz float %194, %197 %199 = call nsz float @llvm.SI.load.const(<16 x i8> %196, i32 372) %200 = fmul nsz float %194, %199 %201 = call nsz float @llvm.SI.load.const(<16 x i8> %196, i32 376) %202 = fmul nsz float %194, %201 %203 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %204 = load <16 x i8>, <16 x i8> addrspace(2)* %203, align 16, !invariant.load !0 %205 = call nsz float @llvm.SI.load.const(<16 x i8> %204, i32 352) %206 = call nsz float @llvm.SI.load.const(<16 x i8> %204, i32 356) %207 = call nsz float @llvm.SI.load.const(<16 x i8> %204, i32 360) %208 = fmul nsz float %205, %141 %209 = fmul nsz float %206, %142 %210 = fadd nsz float %209, %208 %211 = fmul nsz float %207, %143 %212 = fadd nsz float %210, %211 %213 = call nsz float @llvm.maxnum.f32(float %212, float 0.000000e+00) #2 %214 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %215 = load <16 x i8>, <16 x i8> addrspace(2)* %214, align 16, !invariant.load !0 %216 = call nsz float @llvm.SI.load.const(<16 x i8> %215, i32 384) %217 = fmul nsz float %213, %216 %218 = fadd nsz float %217, %198 %219 = call nsz float @llvm.SI.load.const(<16 x i8> %215, i32 388) %220 = fmul nsz float %213, %219 %221 = fadd nsz float %220, %200 %222 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %223 = load <16 x i8>, <16 x i8> addrspace(2)* %222, align 16, !invariant.load !0 %224 = call nsz float @llvm.SI.load.const(<16 x i8> %223, i32 392) %225 = fmul nsz float %213, %224 %226 = fadd nsz float %225, %202 %227 = call nsz float @llvm.SI.load.const(<16 x i8> %223, i32 400) %228 = fsub nsz float %179, %227 %229 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %230 = load <16 x i8>, <16 x i8> addrspace(2)* %229, align 16, !invariant.load !0 %231 = call nsz float @llvm.SI.load.const(<16 x i8> %230, i32 404) %232 = fsub nsz float %181, %231 %233 = call nsz float @llvm.SI.load.const(<16 x i8> %230, i32 408) %234 = fsub nsz float %183, %233 %235 = fmul nsz float %228, %228 %236 = fmul nsz float %232, %232 %237 = fadd nsz float %236, %235 %238 = fmul nsz float %234, %234 %239 = fadd nsz float %237, %238 %240 = call nsz float @llvm.sqrt.f32(float %239) #2 %241 = fdiv nsz float 1.000000e+00, %240, !fpmath !1 %242 = fmul nsz float %228, %241 %243 = fmul nsz float %232, %241 %244 = fmul nsz float %234, %241 %245 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %246 = load <16 x i8>, <16 x i8> addrspace(2)* %245, align 16, !invariant.load !0 %247 = call nsz float @llvm.SI.load.const(<16 x i8> %246, i32 412) %248 = fcmp nsz olt float %240, %247 %249 = select i1 %248, float 1.000000e+00, float 0.000000e+00 %250 = fmul nsz float %242, %141 %251 = fmul nsz float %243, %142 %252 = fadd nsz float %251, %250 %253 = fmul nsz float %244, %143 %254 = fadd nsz float %252, %253 %255 = call nsz float @llvm.maxnum.f32(float %254, float 0.000000e+00) #2 %256 = fmul nsz float %255, %249 %257 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %258 = load <16 x i8>, <16 x i8> addrspace(2)* %257, align 16, !invariant.load !0 %259 = call nsz float @llvm.SI.load.const(<16 x i8> %258, i32 432) %260 = fmul nsz float %256, %259 %261 = fadd nsz float %260, %218 %262 = call nsz float @llvm.SI.load.const(<16 x i8> %258, i32 436) %263 = fmul nsz float %256, %262 %264 = fadd nsz float %263, %221 %265 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %266 = load <16 x i8>, <16 x i8> addrspace(2)* %265, align 16, !invariant.load !0 %267 = call nsz float @llvm.SI.load.const(<16 x i8> %266, i32 440) %268 = fmul nsz float %256, %267 %269 = fadd nsz float %268, %226 %270 = call nsz float @llvm.SI.load.const(<16 x i8> %266, i32 416) %271 = fsub nsz float %179, %270 %272 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %273 = load <16 x i8>, <16 x i8> addrspace(2)* %272, align 16, !invariant.load !0 %274 = call nsz float @llvm.SI.load.const(<16 x i8> %273, i32 420) %275 = fsub nsz float %181, %274 %276 = call nsz float @llvm.SI.load.const(<16 x i8> %273, i32 424) %277 = fsub nsz float %183, %276 %278 = fmul nsz float %271, %271 %279 = fmul nsz float %275, %275 %280 = fadd nsz float %279, %278 %281 = fmul nsz float %277, %277 %282 = fadd nsz float %280, %281 %283 = call nsz float @llvm.sqrt.f32(float %282) #2 %284 = fdiv nsz float 1.000000e+00, %283, !fpmath !1 %285 = fmul nsz float %271, %284 %286 = fmul nsz float %275, %284 %287 = fmul nsz float %277, %284 %288 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %289 = load <16 x i8>, <16 x i8> addrspace(2)* %288, align 16, !invariant.load !0 %290 = call nsz float @llvm.SI.load.const(<16 x i8> %289, i32 428) %291 = fcmp nsz olt float %283, %290 %292 = select i1 %291, float 1.000000e+00, float 0.000000e+00 %293 = fmul nsz float %285, %141 %294 = fmul nsz float %286, %142 %295 = fadd nsz float %294, %293 %296 = fmul nsz float %287, %143 %297 = fadd nsz float %295, %296 %298 = call nsz float @llvm.maxnum.f32(float %297, float 0.000000e+00) #2 %299 = fmul nsz float %298, %292 %300 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %301 = load <16 x i8>, <16 x i8> addrspace(2)* %300, align 16, !invariant.load !0 %302 = call nsz float @llvm.SI.load.const(<16 x i8> %301, i32 448) %303 = fmul nsz float %299, %302 %304 = fadd nsz float %303, %261 %305 = call nsz float @llvm.SI.load.const(<16 x i8> %301, i32 452) %306 = fmul nsz float %299, %305 %307 = fadd nsz float %306, %264 %308 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %309 = load <16 x i8>, <16 x i8> addrspace(2)* %308, align 16, !invariant.load !0 %310 = call nsz float @llvm.SI.load.const(<16 x i8> %309, i32 456) %311 = fmul nsz float %299, %310 %312 = fadd nsz float %311, %269 %313 = fmul nsz float %304, %41 %314 = fmul nsz float %307, %42 %315 = fmul nsz float %312, %43 %316 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %317 = load <16 x i8>, <16 x i8> addrspace(2)* %316, align 16, !invariant.load !0 %318 = call nsz float @llvm.SI.load.const(<16 x i8> %317, i32 320) %319 = fadd nsz float %313, %318 %320 = call nsz float @llvm.SI.load.const(<16 x i8> %317, i32 324) %321 = fadd nsz float %314, %320 %322 = call nsz float @llvm.SI.load.const(<16 x i8> %317, i32 328) %323 = fadd nsz float %315, %322 %324 = call nsz float @llvm.minnum.f32(float %319, float 1.000000e+00) #2 %325 = call nsz float @llvm.minnum.f32(float %321, float 1.000000e+00) #2 %326 = call nsz float @llvm.minnum.f32(float %323, float 1.000000e+00) #2 %327 = bitcast i32 %12 to float %328 = insertvalue <{ float, float, float }> undef, float %327, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %324, float %325, float %326, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %35, float %36, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %90, float %94, float %96, float %98) call void @llvm.SI.export(i32 1, i32 0, i32 1, i32 13, i32 0, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret <{ float, float, float }> %328 } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #2 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #3 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } attributes #3 = { nounwind } !0 = !{} !1 = !{float 2.500000e+00} VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1].x, PSIZE DCL OUT[2], GENERIC[0] DCL OUT[3].xyz, GENERIC[1] DCL CONST[0..30] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {0, 1, 2, 0} 0: MUL TEMP[0], CONST[12], IN[0].xxxx 1: MAD TEMP[0], CONST[13], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[14], IN[0].zzzz, TEMP[0] 3: ADD TEMP[0].z, TEMP[0], CONST[15] 4: ADD TEMP[0].x, TEMP[0].zzzz, -CONST[20].xxxx 5: MUL TEMP[0].x, TEMP[0].xxxx, CONST[21].xxxx 6: MUL TEMP[1], CONST[16], IN[0].xxxx 7: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1] 8: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1] 9: ADD OUT[0], TEMP[1], CONST[19] 10: MOV OUT[1].x, IMM[0].xxxx 11: MUL TEMP[1], CONST[8], IN[1].xxxx 12: MAD TEMP[1], CONST[9], IN[1].yyyy, TEMP[1] 13: MAD TEMP[1].xyz, CONST[10], IN[1].zzzz, TEMP[1] 14: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 15: RSQ TEMP[2].x, TEMP[2].xxxx 16: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 17: MOV TEMP[1].xyz, -TEMP[1].xyzx 18: MUL TEMP[2], CONST[8], IN[0].xxxx 19: MAD TEMP[2], CONST[9], IN[0].yyyy, TEMP[2] 20: MAD TEMP[2], CONST[10], IN[0].zzzz, TEMP[2] 21: ADD TEMP[2].xyz, TEMP[2], CONST[11] 22: DP3 TEMP[3].x, TEMP[1].xyzz, CONST[23].xyzz 23: MAX TEMP[4].x, IMM[0].yyyy, TEMP[3].xxxx 24: MUL TEMP[3], TEMP[4].xxxx, CONST[25] 25: DP3 TEMP[4].x, TEMP[1].xyzz, CONST[24].xyzz 26: MAX TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx 27: MAD TEMP[3], TEMP[4].xxxx, CONST[26], TEMP[3] 28: ADD TEMP[4].xyz, TEMP[2].xyzz, -CONST[27].xyzz 29: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 30: SQRT TEMP[5].x, TEMP[5].xxxx 31: RCP TEMP[6].x, TEMP[5].xxxx 32: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[6].xxxx 33: FSLT TEMP[5].x, TEMP[5].xxxx, CONST[27].wwww 34: UCMP TEMP[6].x, TEMP[5].xxxx, IMM[0].xxxx, TEMP[6].xxxx 35: NOT TEMP[5].x, TEMP[5].xxxx 36: UCMP TEMP[6].x, TEMP[5].xxxx, IMM[0].yyyy, TEMP[6].xxxx 37: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[4].xyzz 38: MAX TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx 39: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[6].xxxx 40: MAD TEMP[3], TEMP[4].xxxx, CONST[29], TEMP[3] 41: ADD TEMP[2].xyz, TEMP[2].xyzz, -CONST[28].xyzz 42: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz 43: SQRT TEMP[4].x, TEMP[4].xxxx 44: RCP TEMP[5].x, TEMP[4].xxxx 45: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx 46: FSLT TEMP[4].x, TEMP[4].xxxx, CONST[28].wwww 47: UCMP TEMP[5].x, TEMP[4].xxxx, IMM[0].xxxx, TEMP[5].xxxx 48: NOT TEMP[4].x, TEMP[4].xxxx 49: UCMP TEMP[5].x, TEMP[4].xxxx, IMM[0].yyyy, TEMP[5].xxxx 50: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[2].xyzz 51: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 52: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx 53: MAD TEMP[3], TEMP[1].xxxx, CONST[30], TEMP[3] 54: MUL TEMP[3], TEMP[3], IN[3] 55: ADD TEMP[3], TEMP[3], CONST[22] 56: MIN TEMP[1].xyz, IMM[0].xxxx, TEMP[3] 57: MOV TEMP[3].xyz, TEMP[1].xyzx 58: MOV TEMP[3].w, IN[3].wwww 59: MOV OUT[2], TEMP[3] 60: MOV OUT[3].z, TEMP[0].xxxx 61: MOV OUT[3].xy, IN[2].xyxx 62: END radeonsi: Compiling shader 17 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #0 { main_body: %18 = bitcast [16 x <16 x i8>] addrspace(2)* %5 to <4 x i32> addrspace(2)* %19 = load <4 x i32>, <4 x i32> addrspace(2)* %18, align 16, !invariant.load !0 %20 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %19, i32 %14, i32 0, i1 false, i1 false) #2 %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = extractelement <4 x float> %20, i32 2 %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %25 = bitcast <16 x i8> addrspace(2)* %24 to <4 x i32> addrspace(2)* %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !invariant.load !0 %27 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %26, i32 %15, i32 0, i1 false, i1 false) #2 %28 = extractelement <4 x float> %27, i32 0 %29 = extractelement <4 x float> %27, i32 1 %30 = extractelement <4 x float> %27, i32 2 %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2, !amdgpu.uniform !0 %32 = bitcast <16 x i8> addrspace(2)* %31 to <4 x i32> addrspace(2)* %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !invariant.load !0 %34 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %33, i32 %16, i32 0, i1 false, i1 false) #2 %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3, !amdgpu.uniform !0 %38 = bitcast <16 x i8> addrspace(2)* %37 to <4 x i32> addrspace(2)* %39 = load <4 x i32>, <4 x i32> addrspace(2)* %38, align 16, !invariant.load !0 %40 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %39, i32 %17, i32 0, i1 false, i1 false) #2 %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !invariant.load !0 %47 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 200) %48 = fmul nsz float %47, %21 %49 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 216) %50 = fmul nsz float %49, %22 %51 = fadd nsz float %50, %48 %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !invariant.load !0 %54 = call nsz float @llvm.SI.load.const(<16 x i8> %53, i32 232) %55 = fmul nsz float %54, %23 %56 = fadd nsz float %55, %51 %57 = call nsz float @llvm.SI.load.const(<16 x i8> %53, i32 248) %58 = fadd nsz float %56, %57 %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !invariant.load !0 %61 = call nsz float @llvm.SI.load.const(<16 x i8> %60, i32 320) %62 = fsub nsz float %58, %61 %63 = call nsz float @llvm.SI.load.const(<16 x i8> %60, i32 336) %64 = fmul nsz float %62, %63 %65 = call nsz float @llvm.SI.load.const(<16 x i8> %60, i32 256) %66 = fmul nsz float %65, %21 %67 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !invariant.load !0 %69 = call nsz float @llvm.SI.load.const(<16 x i8> %68, i32 260) %70 = fmul nsz float %69, %21 %71 = call nsz float @llvm.SI.load.const(<16 x i8> %68, i32 264) %72 = fmul nsz float %71, %21 %73 = call nsz float @llvm.SI.load.const(<16 x i8> %68, i32 268) %74 = fmul nsz float %73, %21 %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !invariant.load !0 %77 = call nsz float @llvm.SI.load.const(<16 x i8> %76, i32 272) %78 = fmul nsz float %77, %22 %79 = fadd nsz float %78, %66 %80 = call nsz float @llvm.SI.load.const(<16 x i8> %76, i32 276) %81 = fmul nsz float %80, %22 %82 = fadd nsz float %81, %70 %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !invariant.load !0 %85 = call nsz float @llvm.SI.load.const(<16 x i8> %84, i32 280) %86 = fmul nsz float %85, %22 %87 = fadd nsz float %86, %72 %88 = call nsz float @llvm.SI.load.const(<16 x i8> %84, i32 284) %89 = fmul nsz float %88, %22 %90 = fadd nsz float %89, %74 %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !invariant.load !0 %93 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 288) %94 = fmul nsz float %93, %23 %95 = fadd nsz float %94, %79 %96 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 292) %97 = fmul nsz float %96, %23 %98 = fadd nsz float %97, %82 %99 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !invariant.load !0 %101 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 296) %102 = fmul nsz float %101, %23 %103 = fadd nsz float %102, %87 %104 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 300) %105 = fmul nsz float %104, %23 %106 = fadd nsz float %105, %90 %107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !invariant.load !0 %109 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 304) %110 = fadd nsz float %95, %109 %111 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 308) %112 = fadd nsz float %98, %111 %113 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 312) %114 = fadd nsz float %103, %113 %115 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %116 = load <16 x i8>, <16 x i8> addrspace(2)* %115, align 16, !invariant.load !0 %117 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 316) %118 = fadd nsz float %106, %117 %119 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 128) %120 = fmul nsz float %119, %28 %121 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 132) %122 = fmul nsz float %121, %28 %123 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %124 = load <16 x i8>, <16 x i8> addrspace(2)* %123, align 16, !invariant.load !0 %125 = call nsz float @llvm.SI.load.const(<16 x i8> %124, i32 136) %126 = fmul nsz float %125, %28 %127 = call nsz float @llvm.SI.load.const(<16 x i8> %124, i32 144) %128 = fmul nsz float %127, %29 %129 = fadd nsz float %128, %120 %130 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %131 = load <16 x i8>, <16 x i8> addrspace(2)* %130, align 16, !invariant.load !0 %132 = call nsz float @llvm.SI.load.const(<16 x i8> %131, i32 148) %133 = fmul nsz float %132, %29 %134 = fadd nsz float %133, %122 %135 = call nsz float @llvm.SI.load.const(<16 x i8> %131, i32 152) %136 = fmul nsz float %135, %29 %137 = fadd nsz float %136, %126 %138 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %139 = load <16 x i8>, <16 x i8> addrspace(2)* %138, align 16, !invariant.load !0 %140 = call nsz float @llvm.SI.load.const(<16 x i8> %139, i32 160) %141 = fmul nsz float %140, %30 %142 = fadd nsz float %141, %129 %143 = call nsz float @llvm.SI.load.const(<16 x i8> %139, i32 164) %144 = fmul nsz float %143, %30 %145 = fadd nsz float %144, %134 %146 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %147 = load <16 x i8>, <16 x i8> addrspace(2)* %146, align 16, !invariant.load !0 %148 = call nsz float @llvm.SI.load.const(<16 x i8> %147, i32 168) %149 = fmul nsz float %148, %30 %150 = fadd nsz float %149, %137 %151 = fmul nsz float %142, %142 %152 = fmul nsz float %145, %145 %153 = fadd nsz float %152, %151 %154 = fmul nsz float %150, %150 %155 = fadd nsz float %153, %154 %156 = call nsz float @llvm.sqrt.f32(float %155) #2 %157 = fdiv nsz float 1.000000e+00, %156, !fpmath !1 %158 = fmul nsz float %142, %157 %159 = fmul nsz float %145, %157 %160 = fmul nsz float %150, %157 %161 = fsub nsz float -0.000000e+00, %158 %162 = fsub nsz float -0.000000e+00, %159 %163 = fsub nsz float -0.000000e+00, %160 %164 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %165 = load <16 x i8>, <16 x i8> addrspace(2)* %164, align 16, !invariant.load !0 %166 = call nsz float @llvm.SI.load.const(<16 x i8> %165, i32 128) %167 = fmul nsz float %166, %21 %168 = call nsz float @llvm.SI.load.const(<16 x i8> %165, i32 132) %169 = fmul nsz float %168, %21 %170 = call nsz float @llvm.SI.load.const(<16 x i8> %165, i32 136) %171 = fmul nsz float %170, %21 %172 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %173 = load <16 x i8>, <16 x i8> addrspace(2)* %172, align 16, !invariant.load !0 %174 = call nsz float @llvm.SI.load.const(<16 x i8> %173, i32 144) %175 = fmul nsz float %174, %22 %176 = fadd nsz float %175, %167 %177 = call nsz float @llvm.SI.load.const(<16 x i8> %173, i32 148) %178 = fmul nsz float %177, %22 %179 = fadd nsz float %178, %169 %180 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %181 = load <16 x i8>, <16 x i8> addrspace(2)* %180, align 16, !invariant.load !0 %182 = call nsz float @llvm.SI.load.const(<16 x i8> %181, i32 152) %183 = fmul nsz float %182, %22 %184 = fadd nsz float %183, %171 %185 = call nsz float @llvm.SI.load.const(<16 x i8> %181, i32 160) %186 = fmul nsz float %185, %23 %187 = fadd nsz float %186, %176 %188 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %189 = load <16 x i8>, <16 x i8> addrspace(2)* %188, align 16, !invariant.load !0 %190 = call nsz float @llvm.SI.load.const(<16 x i8> %189, i32 164) %191 = fmul nsz float %190, %23 %192 = fadd nsz float %191, %179 %193 = call nsz float @llvm.SI.load.const(<16 x i8> %189, i32 168) %194 = fmul nsz float %193, %23 %195 = fadd nsz float %194, %184 %196 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %197 = load <16 x i8>, <16 x i8> addrspace(2)* %196, align 16, !invariant.load !0 %198 = call nsz float @llvm.SI.load.const(<16 x i8> %197, i32 176) %199 = fadd nsz float %187, %198 %200 = call nsz float @llvm.SI.load.const(<16 x i8> %197, i32 180) %201 = fadd nsz float %192, %200 %202 = call nsz float @llvm.SI.load.const(<16 x i8> %197, i32 184) %203 = fadd nsz float %195, %202 %204 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %205 = load <16 x i8>, <16 x i8> addrspace(2)* %204, align 16, !invariant.load !0 %206 = call nsz float @llvm.SI.load.const(<16 x i8> %205, i32 368) %207 = call nsz float @llvm.SI.load.const(<16 x i8> %205, i32 372) %208 = call nsz float @llvm.SI.load.const(<16 x i8> %205, i32 376) %209 = fmul nsz float %206, %161 %210 = fmul nsz float %207, %162 %211 = fadd nsz float %210, %209 %212 = fmul nsz float %208, %163 %213 = fadd nsz float %211, %212 %214 = call nsz float @llvm.maxnum.f32(float %213, float 0.000000e+00) #2 %215 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %216 = load <16 x i8>, <16 x i8> addrspace(2)* %215, align 16, !invariant.load !0 %217 = call nsz float @llvm.SI.load.const(<16 x i8> %216, i32 400) %218 = fmul nsz float %214, %217 %219 = call nsz float @llvm.SI.load.const(<16 x i8> %216, i32 404) %220 = fmul nsz float %214, %219 %221 = call nsz float @llvm.SI.load.const(<16 x i8> %216, i32 408) %222 = fmul nsz float %214, %221 %223 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %224 = load <16 x i8>, <16 x i8> addrspace(2)* %223, align 16, !invariant.load !0 %225 = call nsz float @llvm.SI.load.const(<16 x i8> %224, i32 384) %226 = call nsz float @llvm.SI.load.const(<16 x i8> %224, i32 388) %227 = call nsz float @llvm.SI.load.const(<16 x i8> %224, i32 392) %228 = fmul nsz float %225, %161 %229 = fmul nsz float %226, %162 %230 = fadd nsz float %229, %228 %231 = fmul nsz float %227, %163 %232 = fadd nsz float %230, %231 %233 = call nsz float @llvm.maxnum.f32(float %232, float 0.000000e+00) #2 %234 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %235 = load <16 x i8>, <16 x i8> addrspace(2)* %234, align 16, !invariant.load !0 %236 = call nsz float @llvm.SI.load.const(<16 x i8> %235, i32 416) %237 = fmul nsz float %233, %236 %238 = fadd nsz float %237, %218 %239 = call nsz float @llvm.SI.load.const(<16 x i8> %235, i32 420) %240 = fmul nsz float %233, %239 %241 = fadd nsz float %240, %220 %242 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %243 = load <16 x i8>, <16 x i8> addrspace(2)* %242, align 16, !invariant.load !0 %244 = call nsz float @llvm.SI.load.const(<16 x i8> %243, i32 424) %245 = fmul nsz float %233, %244 %246 = fadd nsz float %245, %222 %247 = call nsz float @llvm.SI.load.const(<16 x i8> %243, i32 432) %248 = fsub nsz float %199, %247 %249 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %250 = load <16 x i8>, <16 x i8> addrspace(2)* %249, align 16, !invariant.load !0 %251 = call nsz float @llvm.SI.load.const(<16 x i8> %250, i32 436) %252 = fsub nsz float %201, %251 %253 = call nsz float @llvm.SI.load.const(<16 x i8> %250, i32 440) %254 = fsub nsz float %203, %253 %255 = fmul nsz float %248, %248 %256 = fmul nsz float %252, %252 %257 = fadd nsz float %256, %255 %258 = fmul nsz float %254, %254 %259 = fadd nsz float %257, %258 %260 = call nsz float @llvm.sqrt.f32(float %259) #2 %261 = fdiv nsz float 1.000000e+00, %260, !fpmath !1 %262 = fmul nsz float %248, %261 %263 = fmul nsz float %252, %261 %264 = fmul nsz float %254, %261 %265 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %266 = load <16 x i8>, <16 x i8> addrspace(2)* %265, align 16, !invariant.load !0 %267 = call nsz float @llvm.SI.load.const(<16 x i8> %266, i32 444) %268 = fcmp nsz olt float %260, %267 %269 = select i1 %268, float 1.000000e+00, float 0.000000e+00 %270 = fmul nsz float %262, %161 %271 = fmul nsz float %263, %162 %272 = fadd nsz float %271, %270 %273 = fmul nsz float %264, %163 %274 = fadd nsz float %272, %273 %275 = call nsz float @llvm.maxnum.f32(float %274, float 0.000000e+00) #2 %276 = fmul nsz float %275, %269 %277 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %278 = load <16 x i8>, <16 x i8> addrspace(2)* %277, align 16, !invariant.load !0 %279 = call nsz float @llvm.SI.load.const(<16 x i8> %278, i32 464) %280 = fmul nsz float %276, %279 %281 = fadd nsz float %280, %238 %282 = call nsz float @llvm.SI.load.const(<16 x i8> %278, i32 468) %283 = fmul nsz float %276, %282 %284 = fadd nsz float %283, %241 %285 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %286 = load <16 x i8>, <16 x i8> addrspace(2)* %285, align 16, !invariant.load !0 %287 = call nsz float @llvm.SI.load.const(<16 x i8> %286, i32 472) %288 = fmul nsz float %276, %287 %289 = fadd nsz float %288, %246 %290 = call nsz float @llvm.SI.load.const(<16 x i8> %286, i32 448) %291 = fsub nsz float %199, %290 %292 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %293 = load <16 x i8>, <16 x i8> addrspace(2)* %292, align 16, !invariant.load !0 %294 = call nsz float @llvm.SI.load.const(<16 x i8> %293, i32 452) %295 = fsub nsz float %201, %294 %296 = call nsz float @llvm.SI.load.const(<16 x i8> %293, i32 456) %297 = fsub nsz float %203, %296 %298 = fmul nsz float %291, %291 %299 = fmul nsz float %295, %295 %300 = fadd nsz float %299, %298 %301 = fmul nsz float %297, %297 %302 = fadd nsz float %300, %301 %303 = call nsz float @llvm.sqrt.f32(float %302) #2 %304 = fdiv nsz float 1.000000e+00, %303, !fpmath !1 %305 = fmul nsz float %291, %304 %306 = fmul nsz float %295, %304 %307 = fmul nsz float %297, %304 %308 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %309 = load <16 x i8>, <16 x i8> addrspace(2)* %308, align 16, !invariant.load !0 %310 = call nsz float @llvm.SI.load.const(<16 x i8> %309, i32 460) %311 = fcmp nsz olt float %303, %310 %312 = select i1 %311, float 1.000000e+00, float 0.000000e+00 %313 = fmul nsz float %305, %161 %314 = fmul nsz float %306, %162 %315 = fadd nsz float %314, %313 %316 = fmul nsz float %307, %163 %317 = fadd nsz float %315, %316 %318 = call nsz float @llvm.maxnum.f32(float %317, float 0.000000e+00) #2 %319 = fmul nsz float %318, %312 %320 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %321 = load <16 x i8>, <16 x i8> addrspace(2)* %320, align 16, !invariant.load !0 %322 = call nsz float @llvm.SI.load.const(<16 x i8> %321, i32 480) %323 = fmul nsz float %319, %322 %324 = fadd nsz float %323, %281 %325 = call nsz float @llvm.SI.load.const(<16 x i8> %321, i32 484) %326 = fmul nsz float %319, %325 %327 = fadd nsz float %326, %284 %328 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %329 = load <16 x i8>, <16 x i8> addrspace(2)* %328, align 16, !invariant.load !0 %330 = call nsz float @llvm.SI.load.const(<16 x i8> %329, i32 488) %331 = fmul nsz float %319, %330 %332 = fadd nsz float %331, %289 %333 = fmul nsz float %324, %41 %334 = fmul nsz float %327, %42 %335 = fmul nsz float %332, %43 %336 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %337 = load <16 x i8>, <16 x i8> addrspace(2)* %336, align 16, !invariant.load !0 %338 = call nsz float @llvm.SI.load.const(<16 x i8> %337, i32 352) %339 = fadd nsz float %333, %338 %340 = call nsz float @llvm.SI.load.const(<16 x i8> %337, i32 356) %341 = fadd nsz float %334, %340 %342 = call nsz float @llvm.SI.load.const(<16 x i8> %337, i32 360) %343 = fadd nsz float %335, %342 %344 = call nsz float @llvm.minnum.f32(float %339, float 1.000000e+00) #2 %345 = call nsz float @llvm.minnum.f32(float %341, float 1.000000e+00) #2 %346 = call nsz float @llvm.minnum.f32(float %343, float 1.000000e+00) #2 %347 = bitcast i32 %12 to float %348 = insertvalue <{ float, float, float }> undef, float %347, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %344, float %345, float %346, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %35, float %36, float %64, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %110, float %112, float %114, float %118) call void @llvm.SI.export(i32 1, i32 0, i32 1, i32 13, i32 0, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret <{ float, float, float }> %348 } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #2 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #3 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } attributes #3 = { nounwind } !0 = !{} !1 = !{float 2.500000e+00} VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1].x, PSIZE DCL OUT[2], GENERIC[0] DCL OUT[3].xy, GENERIC[1] DCL CONST[0..36] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {0, 1, 2, 3} IMM[2] INT32 {4, 0, 0, 0} 0: MUL TEMP[0], CONST[16], IN[0].xxxx 1: MAD TEMP[0], CONST[17], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[18], IN[0].zzzz, TEMP[0] 3: ADD OUT[0], TEMP[0], CONST[19] 4: MOV OUT[1].x, IMM[0].xxxx 5: MUL TEMP[0], CONST[8], IN[1].xxxx 6: MAD TEMP[0], CONST[9], IN[1].yyyy, TEMP[0] 7: MAD TEMP[0].xyz, CONST[10], IN[1].zzzz, TEMP[0] 8: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 9: RSQ TEMP[1].x, TEMP[1].xxxx 10: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 11: MOV TEMP[0].xyz, -TEMP[0].xyzx 12: MUL TEMP[1], CONST[8], IN[0].xxxx 13: MAD TEMP[1], CONST[9], IN[0].yyyy, TEMP[1] 14: MAD TEMP[1], CONST[10], IN[0].zzzz, TEMP[1] 15: ADD TEMP[1].xyz, TEMP[1], CONST[11] 16: DP3 TEMP[2].x, TEMP[0].xyzz, CONST[21].xyzz 17: MAX TEMP[3].x, IMM[0].yyyy, TEMP[2].xxxx 18: MUL TEMP[2], TEMP[3].xxxx, CONST[25] 19: DP3 TEMP[3].x, TEMP[0].xyzz, CONST[22].xyzz 20: MAX TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 21: MAD TEMP[2], TEMP[3].xxxx, CONST[26], TEMP[2] 22: DP3 TEMP[3].x, TEMP[0].xyzz, CONST[23].xyzz 23: MAX TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 24: MAD TEMP[2], TEMP[3].xxxx, CONST[27], TEMP[2] 25: DP3 TEMP[3].x, TEMP[0].xyzz, CONST[24].xyzz 26: MAX TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 27: MAD TEMP[2], TEMP[3].xxxx, CONST[28], TEMP[2] 28: ADD TEMP[3].xyz, TEMP[1].xyzz, -CONST[29].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: SQRT TEMP[4].x, TEMP[4].xxxx 31: RCP TEMP[5].x, TEMP[4].xxxx 32: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xxxx 33: FSLT TEMP[4].x, TEMP[4].xxxx, CONST[29].wwww 34: UCMP TEMP[5].x, TEMP[4].xxxx, IMM[0].xxxx, TEMP[5].xxxx 35: NOT TEMP[4].x, TEMP[4].xxxx 36: UCMP TEMP[5].x, TEMP[4].xxxx, IMM[0].yyyy, TEMP[5].xxxx 37: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[3].xyzz 38: MAX TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 39: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[5].xxxx 40: MAD TEMP[2], TEMP[3].xxxx, CONST[33], TEMP[2] 41: ADD TEMP[3].xyz, TEMP[1].xyzz, -CONST[30].xyzz 42: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 43: SQRT TEMP[4].x, TEMP[4].xxxx 44: RCP TEMP[5].x, TEMP[4].xxxx 45: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xxxx 46: FSLT TEMP[4].x, TEMP[4].xxxx, CONST[30].wwww 47: UCMP TEMP[5].x, TEMP[4].xxxx, IMM[0].xxxx, TEMP[5].xxxx 48: NOT TEMP[4].x, TEMP[4].xxxx 49: UCMP TEMP[5].x, TEMP[4].xxxx, IMM[0].yyyy, TEMP[5].xxxx 50: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[3].xyzz 51: MAX TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 52: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[5].xxxx 53: MAD TEMP[2], TEMP[3].xxxx, CONST[34], TEMP[2] 54: ADD TEMP[3].xyz, TEMP[1].xyzz, -CONST[31].xyzz 55: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 56: SQRT TEMP[4].x, TEMP[4].xxxx 57: RCP TEMP[5].x, TEMP[4].xxxx 58: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xxxx 59: FSLT TEMP[4].x, TEMP[4].xxxx, CONST[31].wwww 60: UCMP TEMP[5].x, TEMP[4].xxxx, IMM[0].xxxx, TEMP[5].xxxx 61: NOT TEMP[4].x, TEMP[4].xxxx 62: UCMP TEMP[5].x, TEMP[4].xxxx, IMM[0].yyyy, TEMP[5].xxxx 63: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[3].xyzz 64: MAX TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 65: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[5].xxxx 66: MAD TEMP[2], TEMP[3].xxxx, CONST[35], TEMP[2] 67: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[32].xyzz 68: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz 69: SQRT TEMP[3].x, TEMP[3].xxxx 70: RCP TEMP[4].x, TEMP[3].xxxx 71: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx 72: FSLT TEMP[3].x, TEMP[3].xxxx, CONST[32].wwww 73: UCMP TEMP[4].x, TEMP[3].xxxx, IMM[0].xxxx, TEMP[4].xxxx 74: NOT TEMP[3].x, TEMP[3].xxxx 75: UCMP TEMP[4].x, TEMP[3].xxxx, IMM[0].yyyy, TEMP[4].xxxx 76: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[1].xyzz 77: MAX TEMP[0].x, IMM[0].yyyy, TEMP[0].xxxx 78: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx 79: MAD TEMP[2], TEMP[0].xxxx, CONST[36], TEMP[2] 80: MUL TEMP[2], TEMP[2], IN[3] 81: ADD TEMP[2], TEMP[2], CONST[20] 82: MIN TEMP[0].xyz, IMM[0].xxxx, TEMP[2] 83: MOV TEMP[2].xyz, TEMP[0].xyzx 84: MOV TEMP[2].w, IN[3].wwww 85: MOV OUT[2], TEMP[2] 86: MOV OUT[3].xy, IN[2].xyxx 87: END radeonsi: Compiling shader 18 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #0 { main_body: %18 = bitcast [16 x <16 x i8>] addrspace(2)* %5 to <4 x i32> addrspace(2)* %19 = load <4 x i32>, <4 x i32> addrspace(2)* %18, align 16, !invariant.load !0 %20 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %19, i32 %14, i32 0, i1 false, i1 false) #2 %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = extractelement <4 x float> %20, i32 2 %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %25 = bitcast <16 x i8> addrspace(2)* %24 to <4 x i32> addrspace(2)* %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !invariant.load !0 %27 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %26, i32 %15, i32 0, i1 false, i1 false) #2 %28 = extractelement <4 x float> %27, i32 0 %29 = extractelement <4 x float> %27, i32 1 %30 = extractelement <4 x float> %27, i32 2 %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2, !amdgpu.uniform !0 %32 = bitcast <16 x i8> addrspace(2)* %31 to <4 x i32> addrspace(2)* %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !invariant.load !0 %34 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %33, i32 %16, i32 0, i1 false, i1 false) #2 %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3, !amdgpu.uniform !0 %38 = bitcast <16 x i8> addrspace(2)* %37 to <4 x i32> addrspace(2)* %39 = load <4 x i32>, <4 x i32> addrspace(2)* %38, align 16, !invariant.load !0 %40 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %39, i32 %17, i32 0, i1 false, i1 false) #2 %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !invariant.load !0 %47 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 256) %48 = fmul nsz float %47, %21 %49 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 260) %50 = fmul nsz float %49, %21 %51 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 264) %52 = fmul nsz float %51, %21 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !invariant.load !0 %55 = call nsz float @llvm.SI.load.const(<16 x i8> %54, i32 268) %56 = fmul nsz float %55, %21 %57 = call nsz float @llvm.SI.load.const(<16 x i8> %54, i32 272) %58 = fmul nsz float %57, %22 %59 = fadd nsz float %58, %48 %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !invariant.load !0 %62 = call nsz float @llvm.SI.load.const(<16 x i8> %61, i32 276) %63 = fmul nsz float %62, %22 %64 = fadd nsz float %63, %50 %65 = call nsz float @llvm.SI.load.const(<16 x i8> %61, i32 280) %66 = fmul nsz float %65, %22 %67 = fadd nsz float %66, %52 %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !invariant.load !0 %70 = call nsz float @llvm.SI.load.const(<16 x i8> %69, i32 284) %71 = fmul nsz float %70, %22 %72 = fadd nsz float %71, %56 %73 = call nsz float @llvm.SI.load.const(<16 x i8> %69, i32 288) %74 = fmul nsz float %73, %23 %75 = fadd nsz float %74, %59 %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !invariant.load !0 %78 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 292) %79 = fmul nsz float %78, %23 %80 = fadd nsz float %79, %64 %81 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 296) %82 = fmul nsz float %81, %23 %83 = fadd nsz float %82, %67 %84 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %85 = load <16 x i8>, <16 x i8> addrspace(2)* %84, align 16, !invariant.load !0 %86 = call nsz float @llvm.SI.load.const(<16 x i8> %85, i32 300) %87 = fmul nsz float %86, %23 %88 = fadd nsz float %87, %72 %89 = call nsz float @llvm.SI.load.const(<16 x i8> %85, i32 304) %90 = fadd nsz float %75, %89 %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !invariant.load !0 %93 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 308) %94 = fadd nsz float %80, %93 %95 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 312) %96 = fadd nsz float %83, %95 %97 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 316) %98 = fadd nsz float %88, %97 %99 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !invariant.load !0 %101 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 128) %102 = fmul nsz float %101, %28 %103 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 132) %104 = fmul nsz float %103, %28 %105 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 136) %106 = fmul nsz float %105, %28 %107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !invariant.load !0 %109 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 144) %110 = fmul nsz float %109, %29 %111 = fadd nsz float %110, %102 %112 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 148) %113 = fmul nsz float %112, %29 %114 = fadd nsz float %113, %104 %115 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %116 = load <16 x i8>, <16 x i8> addrspace(2)* %115, align 16, !invariant.load !0 %117 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 152) %118 = fmul nsz float %117, %29 %119 = fadd nsz float %118, %106 %120 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 160) %121 = fmul nsz float %120, %30 %122 = fadd nsz float %121, %111 %123 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %124 = load <16 x i8>, <16 x i8> addrspace(2)* %123, align 16, !invariant.load !0 %125 = call nsz float @llvm.SI.load.const(<16 x i8> %124, i32 164) %126 = fmul nsz float %125, %30 %127 = fadd nsz float %126, %114 %128 = call nsz float @llvm.SI.load.const(<16 x i8> %124, i32 168) %129 = fmul nsz float %128, %30 %130 = fadd nsz float %129, %119 %131 = fmul nsz float %122, %122 %132 = fmul nsz float %127, %127 %133 = fadd nsz float %132, %131 %134 = fmul nsz float %130, %130 %135 = fadd nsz float %133, %134 %136 = call nsz float @llvm.sqrt.f32(float %135) #2 %137 = fdiv nsz float 1.000000e+00, %136, !fpmath !1 %138 = fmul nsz float %122, %137 %139 = fmul nsz float %127, %137 %140 = fmul nsz float %130, %137 %141 = fsub nsz float -0.000000e+00, %138 %142 = fsub nsz float -0.000000e+00, %139 %143 = fsub nsz float -0.000000e+00, %140 %144 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %145 = load <16 x i8>, <16 x i8> addrspace(2)* %144, align 16, !invariant.load !0 %146 = call nsz float @llvm.SI.load.const(<16 x i8> %145, i32 128) %147 = fmul nsz float %146, %21 %148 = call nsz float @llvm.SI.load.const(<16 x i8> %145, i32 132) %149 = fmul nsz float %148, %21 %150 = call nsz float @llvm.SI.load.const(<16 x i8> %145, i32 136) %151 = fmul nsz float %150, %21 %152 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %153 = load <16 x i8>, <16 x i8> addrspace(2)* %152, align 16, !invariant.load !0 %154 = call nsz float @llvm.SI.load.const(<16 x i8> %153, i32 144) %155 = fmul nsz float %154, %22 %156 = fadd nsz float %155, %147 %157 = call nsz float @llvm.SI.load.const(<16 x i8> %153, i32 148) %158 = fmul nsz float %157, %22 %159 = fadd nsz float %158, %149 %160 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %161 = load <16 x i8>, <16 x i8> addrspace(2)* %160, align 16, !invariant.load !0 %162 = call nsz float @llvm.SI.load.const(<16 x i8> %161, i32 152) %163 = fmul nsz float %162, %22 %164 = fadd nsz float %163, %151 %165 = call nsz float @llvm.SI.load.const(<16 x i8> %161, i32 160) %166 = fmul nsz float %165, %23 %167 = fadd nsz float %166, %156 %168 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %169 = load <16 x i8>, <16 x i8> addrspace(2)* %168, align 16, !invariant.load !0 %170 = call nsz float @llvm.SI.load.const(<16 x i8> %169, i32 164) %171 = fmul nsz float %170, %23 %172 = fadd nsz float %171, %159 %173 = call nsz float @llvm.SI.load.const(<16 x i8> %169, i32 168) %174 = fmul nsz float %173, %23 %175 = fadd nsz float %174, %164 %176 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %177 = load <16 x i8>, <16 x i8> addrspace(2)* %176, align 16, !invariant.load !0 %178 = call nsz float @llvm.SI.load.const(<16 x i8> %177, i32 176) %179 = fadd nsz float %167, %178 %180 = call nsz float @llvm.SI.load.const(<16 x i8> %177, i32 180) %181 = fadd nsz float %172, %180 %182 = call nsz float @llvm.SI.load.const(<16 x i8> %177, i32 184) %183 = fadd nsz float %175, %182 %184 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %185 = load <16 x i8>, <16 x i8> addrspace(2)* %184, align 16, !invariant.load !0 %186 = call nsz float @llvm.SI.load.const(<16 x i8> %185, i32 336) %187 = call nsz float @llvm.SI.load.const(<16 x i8> %185, i32 340) %188 = call nsz float @llvm.SI.load.const(<16 x i8> %185, i32 344) %189 = fmul nsz float %186, %141 %190 = fmul nsz float %187, %142 %191 = fadd nsz float %190, %189 %192 = fmul nsz float %188, %143 %193 = fadd nsz float %191, %192 %194 = call nsz float @llvm.maxnum.f32(float %193, float 0.000000e+00) #2 %195 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %196 = load <16 x i8>, <16 x i8> addrspace(2)* %195, align 16, !invariant.load !0 %197 = call nsz float @llvm.SI.load.const(<16 x i8> %196, i32 400) %198 = fmul nsz float %194, %197 %199 = call nsz float @llvm.SI.load.const(<16 x i8> %196, i32 404) %200 = fmul nsz float %194, %199 %201 = call nsz float @llvm.SI.load.const(<16 x i8> %196, i32 408) %202 = fmul nsz float %194, %201 %203 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %204 = load <16 x i8>, <16 x i8> addrspace(2)* %203, align 16, !invariant.load !0 %205 = call nsz float @llvm.SI.load.const(<16 x i8> %204, i32 352) %206 = call nsz float @llvm.SI.load.const(<16 x i8> %204, i32 356) %207 = call nsz float @llvm.SI.load.const(<16 x i8> %204, i32 360) %208 = fmul nsz float %205, %141 %209 = fmul nsz float %206, %142 %210 = fadd nsz float %209, %208 %211 = fmul nsz float %207, %143 %212 = fadd nsz float %210, %211 %213 = call nsz float @llvm.maxnum.f32(float %212, float 0.000000e+00) #2 %214 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %215 = load <16 x i8>, <16 x i8> addrspace(2)* %214, align 16, !invariant.load !0 %216 = call nsz float @llvm.SI.load.const(<16 x i8> %215, i32 416) %217 = fmul nsz float %213, %216 %218 = fadd nsz float %217, %198 %219 = call nsz float @llvm.SI.load.const(<16 x i8> %215, i32 420) %220 = fmul nsz float %213, %219 %221 = fadd nsz float %220, %200 %222 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %223 = load <16 x i8>, <16 x i8> addrspace(2)* %222, align 16, !invariant.load !0 %224 = call nsz float @llvm.SI.load.const(<16 x i8> %223, i32 424) %225 = fmul nsz float %213, %224 %226 = fadd nsz float %225, %202 %227 = call nsz float @llvm.SI.load.const(<16 x i8> %223, i32 368) %228 = call nsz float @llvm.SI.load.const(<16 x i8> %223, i32 372) %229 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %230 = load <16 x i8>, <16 x i8> addrspace(2)* %229, align 16, !invariant.load !0 %231 = call nsz float @llvm.SI.load.const(<16 x i8> %230, i32 376) %232 = fmul nsz float %227, %141 %233 = fmul nsz float %228, %142 %234 = fadd nsz float %233, %232 %235 = fmul nsz float %231, %143 %236 = fadd nsz float %234, %235 %237 = call nsz float @llvm.maxnum.f32(float %236, float 0.000000e+00) #2 %238 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %239 = load <16 x i8>, <16 x i8> addrspace(2)* %238, align 16, !invariant.load !0 %240 = call nsz float @llvm.SI.load.const(<16 x i8> %239, i32 432) %241 = fmul nsz float %237, %240 %242 = fadd nsz float %241, %218 %243 = call nsz float @llvm.SI.load.const(<16 x i8> %239, i32 436) %244 = fmul nsz float %237, %243 %245 = fadd nsz float %244, %221 %246 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %247 = load <16 x i8>, <16 x i8> addrspace(2)* %246, align 16, !invariant.load !0 %248 = call nsz float @llvm.SI.load.const(<16 x i8> %247, i32 440) %249 = fmul nsz float %237, %248 %250 = fadd nsz float %249, %226 %251 = call nsz float @llvm.SI.load.const(<16 x i8> %247, i32 384) %252 = call nsz float @llvm.SI.load.const(<16 x i8> %247, i32 388) %253 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %254 = load <16 x i8>, <16 x i8> addrspace(2)* %253, align 16, !invariant.load !0 %255 = call nsz float @llvm.SI.load.const(<16 x i8> %254, i32 392) %256 = fmul nsz float %251, %141 %257 = fmul nsz float %252, %142 %258 = fadd nsz float %257, %256 %259 = fmul nsz float %255, %143 %260 = fadd nsz float %258, %259 %261 = call nsz float @llvm.maxnum.f32(float %260, float 0.000000e+00) #2 %262 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %263 = load <16 x i8>, <16 x i8> addrspace(2)* %262, align 16, !invariant.load !0 %264 = call nsz float @llvm.SI.load.const(<16 x i8> %263, i32 448) %265 = fmul nsz float %261, %264 %266 = fadd nsz float %265, %242 %267 = call nsz float @llvm.SI.load.const(<16 x i8> %263, i32 452) %268 = fmul nsz float %261, %267 %269 = fadd nsz float %268, %245 %270 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %271 = load <16 x i8>, <16 x i8> addrspace(2)* %270, align 16, !invariant.load !0 %272 = call nsz float @llvm.SI.load.const(<16 x i8> %271, i32 456) %273 = fmul nsz float %261, %272 %274 = fadd nsz float %273, %250 %275 = call nsz float @llvm.SI.load.const(<16 x i8> %271, i32 464) %276 = fsub nsz float %179, %275 %277 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %278 = load <16 x i8>, <16 x i8> addrspace(2)* %277, align 16, !invariant.load !0 %279 = call nsz float @llvm.SI.load.const(<16 x i8> %278, i32 468) %280 = fsub nsz float %181, %279 %281 = call nsz float @llvm.SI.load.const(<16 x i8> %278, i32 472) %282 = fsub nsz float %183, %281 %283 = fmul nsz float %276, %276 %284 = fmul nsz float %280, %280 %285 = fadd nsz float %284, %283 %286 = fmul nsz float %282, %282 %287 = fadd nsz float %285, %286 %288 = call nsz float @llvm.sqrt.f32(float %287) #2 %289 = fdiv nsz float 1.000000e+00, %288, !fpmath !1 %290 = fmul nsz float %276, %289 %291 = fmul nsz float %280, %289 %292 = fmul nsz float %282, %289 %293 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %294 = load <16 x i8>, <16 x i8> addrspace(2)* %293, align 16, !invariant.load !0 %295 = call nsz float @llvm.SI.load.const(<16 x i8> %294, i32 476) %296 = fcmp nsz olt float %288, %295 %297 = select i1 %296, float 1.000000e+00, float 0.000000e+00 %298 = fmul nsz float %290, %141 %299 = fmul nsz float %291, %142 %300 = fadd nsz float %299, %298 %301 = fmul nsz float %292, %143 %302 = fadd nsz float %300, %301 %303 = call nsz float @llvm.maxnum.f32(float %302, float 0.000000e+00) #2 %304 = fmul nsz float %303, %297 %305 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %306 = load <16 x i8>, <16 x i8> addrspace(2)* %305, align 16, !invariant.load !0 %307 = call nsz float @llvm.SI.load.const(<16 x i8> %306, i32 528) %308 = fmul nsz float %304, %307 %309 = fadd nsz float %308, %266 %310 = call nsz float @llvm.SI.load.const(<16 x i8> %306, i32 532) %311 = fmul nsz float %304, %310 %312 = fadd nsz float %311, %269 %313 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %314 = load <16 x i8>, <16 x i8> addrspace(2)* %313, align 16, !invariant.load !0 %315 = call nsz float @llvm.SI.load.const(<16 x i8> %314, i32 536) %316 = fmul nsz float %304, %315 %317 = fadd nsz float %316, %274 %318 = call nsz float @llvm.SI.load.const(<16 x i8> %314, i32 480) %319 = fsub nsz float %179, %318 %320 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %321 = load <16 x i8>, <16 x i8> addrspace(2)* %320, align 16, !invariant.load !0 %322 = call nsz float @llvm.SI.load.const(<16 x i8> %321, i32 484) %323 = fsub nsz float %181, %322 %324 = call nsz float @llvm.SI.load.const(<16 x i8> %321, i32 488) %325 = fsub nsz float %183, %324 %326 = fmul nsz float %319, %319 %327 = fmul nsz float %323, %323 %328 = fadd nsz float %327, %326 %329 = fmul nsz float %325, %325 %330 = fadd nsz float %328, %329 %331 = call nsz float @llvm.sqrt.f32(float %330) #2 %332 = fdiv nsz float 1.000000e+00, %331, !fpmath !1 %333 = fmul nsz float %319, %332 %334 = fmul nsz float %323, %332 %335 = fmul nsz float %325, %332 %336 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %337 = load <16 x i8>, <16 x i8> addrspace(2)* %336, align 16, !invariant.load !0 %338 = call nsz float @llvm.SI.load.const(<16 x i8> %337, i32 492) %339 = fcmp nsz olt float %331, %338 %340 = select i1 %339, float 1.000000e+00, float 0.000000e+00 %341 = fmul nsz float %333, %141 %342 = fmul nsz float %334, %142 %343 = fadd nsz float %342, %341 %344 = fmul nsz float %335, %143 %345 = fadd nsz float %343, %344 %346 = call nsz float @llvm.maxnum.f32(float %345, float 0.000000e+00) #2 %347 = fmul nsz float %346, %340 %348 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %349 = load <16 x i8>, <16 x i8> addrspace(2)* %348, align 16, !invariant.load !0 %350 = call nsz float @llvm.SI.load.const(<16 x i8> %349, i32 544) %351 = fmul nsz float %347, %350 %352 = fadd nsz float %351, %309 %353 = call nsz float @llvm.SI.load.const(<16 x i8> %349, i32 548) %354 = fmul nsz float %347, %353 %355 = fadd nsz float %354, %312 %356 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %357 = load <16 x i8>, <16 x i8> addrspace(2)* %356, align 16, !invariant.load !0 %358 = call nsz float @llvm.SI.load.const(<16 x i8> %357, i32 552) %359 = fmul nsz float %347, %358 %360 = fadd nsz float %359, %317 %361 = call nsz float @llvm.SI.load.const(<16 x i8> %357, i32 496) %362 = fsub nsz float %179, %361 %363 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %364 = load <16 x i8>, <16 x i8> addrspace(2)* %363, align 16, !invariant.load !0 %365 = call nsz float @llvm.SI.load.const(<16 x i8> %364, i32 500) %366 = fsub nsz float %181, %365 %367 = call nsz float @llvm.SI.load.const(<16 x i8> %364, i32 504) %368 = fsub nsz float %183, %367 %369 = fmul nsz float %362, %362 %370 = fmul nsz float %366, %366 %371 = fadd nsz float %370, %369 %372 = fmul nsz float %368, %368 %373 = fadd nsz float %371, %372 %374 = call nsz float @llvm.sqrt.f32(float %373) #2 %375 = fdiv nsz float 1.000000e+00, %374, !fpmath !1 %376 = fmul nsz float %362, %375 %377 = fmul nsz float %366, %375 %378 = fmul nsz float %368, %375 %379 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %380 = load <16 x i8>, <16 x i8> addrspace(2)* %379, align 16, !invariant.load !0 %381 = call nsz float @llvm.SI.load.const(<16 x i8> %380, i32 508) %382 = fcmp nsz olt float %374, %381 %383 = select i1 %382, float 1.000000e+00, float 0.000000e+00 %384 = fmul nsz float %376, %141 %385 = fmul nsz float %377, %142 %386 = fadd nsz float %385, %384 %387 = fmul nsz float %378, %143 %388 = fadd nsz float %386, %387 %389 = call nsz float @llvm.maxnum.f32(float %388, float 0.000000e+00) #2 %390 = fmul nsz float %389, %383 %391 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %392 = load <16 x i8>, <16 x i8> addrspace(2)* %391, align 16, !invariant.load !0 %393 = call nsz float @llvm.SI.load.const(<16 x i8> %392, i32 560) %394 = fmul nsz float %390, %393 %395 = fadd nsz float %394, %352 %396 = call nsz float @llvm.SI.load.const(<16 x i8> %392, i32 564) %397 = fmul nsz float %390, %396 %398 = fadd nsz float %397, %355 %399 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %400 = load <16 x i8>, <16 x i8> addrspace(2)* %399, align 16, !invariant.load !0 %401 = call nsz float @llvm.SI.load.const(<16 x i8> %400, i32 568) %402 = fmul nsz float %390, %401 %403 = fadd nsz float %402, %360 %404 = call nsz float @llvm.SI.load.const(<16 x i8> %400, i32 512) %405 = fsub nsz float %179, %404 %406 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %407 = load <16 x i8>, <16 x i8> addrspace(2)* %406, align 16, !invariant.load !0 %408 = call nsz float @llvm.SI.load.const(<16 x i8> %407, i32 516) %409 = fsub nsz float %181, %408 %410 = call nsz float @llvm.SI.load.const(<16 x i8> %407, i32 520) %411 = fsub nsz float %183, %410 %412 = fmul nsz float %405, %405 %413 = fmul nsz float %409, %409 %414 = fadd nsz float %413, %412 %415 = fmul nsz float %411, %411 %416 = fadd nsz float %414, %415 %417 = call nsz float @llvm.sqrt.f32(float %416) #2 %418 = fdiv nsz float 1.000000e+00, %417, !fpmath !1 %419 = fmul nsz float %405, %418 %420 = fmul nsz float %409, %418 %421 = fmul nsz float %411, %418 %422 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %423 = load <16 x i8>, <16 x i8> addrspace(2)* %422, align 16, !invariant.load !0 %424 = call nsz float @llvm.SI.load.const(<16 x i8> %423, i32 524) %425 = fcmp nsz olt float %417, %424 %426 = select i1 %425, float 1.000000e+00, float 0.000000e+00 %427 = fmul nsz float %419, %141 %428 = fmul nsz float %420, %142 %429 = fadd nsz float %428, %427 %430 = fmul nsz float %421, %143 %431 = fadd nsz float %429, %430 %432 = call nsz float @llvm.maxnum.f32(float %431, float 0.000000e+00) #2 %433 = fmul nsz float %432, %426 %434 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %435 = load <16 x i8>, <16 x i8> addrspace(2)* %434, align 16, !invariant.load !0 %436 = call nsz float @llvm.SI.load.const(<16 x i8> %435, i32 576) %437 = fmul nsz float %433, %436 %438 = fadd nsz float %437, %395 %439 = call nsz float @llvm.SI.load.const(<16 x i8> %435, i32 580) %440 = fmul nsz float %433, %439 %441 = fadd nsz float %440, %398 %442 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %443 = load <16 x i8>, <16 x i8> addrspace(2)* %442, align 16, !invariant.load !0 %444 = call nsz float @llvm.SI.load.const(<16 x i8> %443, i32 584) %445 = fmul nsz float %433, %444 %446 = fadd nsz float %445, %403 %447 = fmul nsz float %438, %41 %448 = fmul nsz float %441, %42 %449 = fmul nsz float %446, %43 %450 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %451 = load <16 x i8>, <16 x i8> addrspace(2)* %450, align 16, !invariant.load !0 %452 = call nsz float @llvm.SI.load.const(<16 x i8> %451, i32 320) %453 = fadd nsz float %447, %452 %454 = call nsz float @llvm.SI.load.const(<16 x i8> %451, i32 324) %455 = fadd nsz float %448, %454 %456 = call nsz float @llvm.SI.load.const(<16 x i8> %451, i32 328) %457 = fadd nsz float %449, %456 %458 = call nsz float @llvm.minnum.f32(float %453, float 1.000000e+00) #2 %459 = call nsz float @llvm.minnum.f32(float %455, float 1.000000e+00) #2 %460 = call nsz float @llvm.minnum.f32(float %457, float 1.000000e+00) #2 %461 = bitcast i32 %12 to float %462 = insertvalue <{ float, float, float }> undef, float %461, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %458, float %459, float %460, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %35, float %36, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %90, float %94, float %96, float %98) call void @llvm.SI.export(i32 1, i32 0, i32 1, i32 13, i32 0, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret <{ float, float, float }> %462 } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #2 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #3 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } attributes #3 = { nounwind } !0 = !{} !1 = !{float 2.500000e+00} VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1].x, PSIZE DCL OUT[2], GENERIC[0] DCL OUT[3].xyz, GENERIC[1] DCL CONST[0..38] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {0, 1, 2, 3} IMM[2] INT32 {4, 0, 0, 0} 0: MUL TEMP[0], CONST[12], IN[0].xxxx 1: MAD TEMP[0], CONST[13], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[14], IN[0].zzzz, TEMP[0] 3: ADD TEMP[0].z, TEMP[0], CONST[15] 4: ADD TEMP[0].x, TEMP[0].zzzz, -CONST[20].xxxx 5: MUL TEMP[0].x, TEMP[0].xxxx, CONST[21].xxxx 6: MUL TEMP[1], CONST[16], IN[0].xxxx 7: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1] 8: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1] 9: ADD OUT[0], TEMP[1], CONST[19] 10: MOV OUT[1].x, IMM[0].xxxx 11: MUL TEMP[1], CONST[8], IN[1].xxxx 12: MAD TEMP[1], CONST[9], IN[1].yyyy, TEMP[1] 13: MAD TEMP[1].xyz, CONST[10], IN[1].zzzz, TEMP[1] 14: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 15: RSQ TEMP[2].x, TEMP[2].xxxx 16: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 17: MOV TEMP[1].xyz, -TEMP[1].xyzx 18: MUL TEMP[2], CONST[8], IN[0].xxxx 19: MAD TEMP[2], CONST[9], IN[0].yyyy, TEMP[2] 20: MAD TEMP[2], CONST[10], IN[0].zzzz, TEMP[2] 21: ADD TEMP[2].xyz, TEMP[2], CONST[11] 22: DP3 TEMP[3].x, TEMP[1].xyzz, CONST[23].xyzz 23: MAX TEMP[4].x, IMM[0].yyyy, TEMP[3].xxxx 24: MUL TEMP[3], TEMP[4].xxxx, CONST[27] 25: DP3 TEMP[4].x, TEMP[1].xyzz, CONST[24].xyzz 26: MAX TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx 27: MAD TEMP[3], TEMP[4].xxxx, CONST[28], TEMP[3] 28: DP3 TEMP[4].x, TEMP[1].xyzz, CONST[25].xyzz 29: MAX TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx 30: MAD TEMP[3], TEMP[4].xxxx, CONST[29], TEMP[3] 31: DP3 TEMP[4].x, TEMP[1].xyzz, CONST[26].xyzz 32: MAX TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx 33: MAD TEMP[3], TEMP[4].xxxx, CONST[30], TEMP[3] 34: ADD TEMP[4].xyz, TEMP[2].xyzz, -CONST[31].xyzz 35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 36: SQRT TEMP[5].x, TEMP[5].xxxx 37: RCP TEMP[6].x, TEMP[5].xxxx 38: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[6].xxxx 39: FSLT TEMP[5].x, TEMP[5].xxxx, CONST[31].wwww 40: UCMP TEMP[6].x, TEMP[5].xxxx, IMM[0].xxxx, TEMP[6].xxxx 41: NOT TEMP[5].x, TEMP[5].xxxx 42: UCMP TEMP[6].x, TEMP[5].xxxx, IMM[0].yyyy, TEMP[6].xxxx 43: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[4].xyzz 44: MAX TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx 45: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[6].xxxx 46: MAD TEMP[3], TEMP[4].xxxx, CONST[35], TEMP[3] 47: ADD TEMP[4].xyz, TEMP[2].xyzz, -CONST[32].xyzz 48: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 49: SQRT TEMP[5].x, TEMP[5].xxxx 50: RCP TEMP[6].x, TEMP[5].xxxx 51: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[6].xxxx 52: FSLT TEMP[5].x, TEMP[5].xxxx, CONST[32].wwww 53: UCMP TEMP[6].x, TEMP[5].xxxx, IMM[0].xxxx, TEMP[6].xxxx 54: NOT TEMP[5].x, TEMP[5].xxxx 55: UCMP TEMP[6].x, TEMP[5].xxxx, IMM[0].yyyy, TEMP[6].xxxx 56: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[4].xyzz 57: MAX TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx 58: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[6].xxxx 59: MAD TEMP[3], TEMP[4].xxxx, CONST[36], TEMP[3] 60: ADD TEMP[4].xyz, TEMP[2].xyzz, -CONST[33].xyzz 61: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 62: SQRT TEMP[5].x, TEMP[5].xxxx 63: RCP TEMP[6].x, TEMP[5].xxxx 64: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[6].xxxx 65: FSLT TEMP[5].x, TEMP[5].xxxx, CONST[33].wwww 66: UCMP TEMP[6].x, TEMP[5].xxxx, IMM[0].xxxx, TEMP[6].xxxx 67: NOT TEMP[5].x, TEMP[5].xxxx 68: UCMP TEMP[6].x, TEMP[5].xxxx, IMM[0].yyyy, TEMP[6].xxxx 69: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[4].xyzz 70: MAX TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx 71: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[6].xxxx 72: MAD TEMP[3], TEMP[4].xxxx, CONST[37], TEMP[3] 73: ADD TEMP[2].xyz, TEMP[2].xyzz, -CONST[34].xyzz 74: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz 75: SQRT TEMP[4].x, TEMP[4].xxxx 76: RCP TEMP[5].x, TEMP[4].xxxx 77: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx 78: FSLT TEMP[4].x, TEMP[4].xxxx, CONST[34].wwww 79: UCMP TEMP[5].x, TEMP[4].xxxx, IMM[0].xxxx, TEMP[5].xxxx 80: NOT TEMP[4].x, TEMP[4].xxxx 81: UCMP TEMP[5].x, TEMP[4].xxxx, IMM[0].yyyy, TEMP[5].xxxx 82: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[2].xyzz 83: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 84: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx 85: MAD TEMP[3], TEMP[1].xxxx, CONST[38], TEMP[3] 86: MUL TEMP[3], TEMP[3], IN[3] 87: ADD TEMP[3], TEMP[3], CONST[22] 88: MIN TEMP[1].xyz, IMM[0].xxxx, TEMP[3] 89: MOV TEMP[3].xyz, TEMP[1].xyzx 90: MOV TEMP[3].w, IN[3].wwww 91: MOV OUT[2], TEMP[3] 92: MOV OUT[3].z, TEMP[0].xxxx 93: MOV OUT[3].xy, IN[2].xyxx 94: END radeonsi: Compiling shader 19 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #0 { main_body: %18 = bitcast [16 x <16 x i8>] addrspace(2)* %5 to <4 x i32> addrspace(2)* %19 = load <4 x i32>, <4 x i32> addrspace(2)* %18, align 16, !invariant.load !0 %20 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %19, i32 %14, i32 0, i1 false, i1 false) #2 %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = extractelement <4 x float> %20, i32 2 %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %25 = bitcast <16 x i8> addrspace(2)* %24 to <4 x i32> addrspace(2)* %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !invariant.load !0 %27 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %26, i32 %15, i32 0, i1 false, i1 false) #2 %28 = extractelement <4 x float> %27, i32 0 %29 = extractelement <4 x float> %27, i32 1 %30 = extractelement <4 x float> %27, i32 2 %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2, !amdgpu.uniform !0 %32 = bitcast <16 x i8> addrspace(2)* %31 to <4 x i32> addrspace(2)* %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !invariant.load !0 %34 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %33, i32 %16, i32 0, i1 false, i1 false) #2 %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3, !amdgpu.uniform !0 %38 = bitcast <16 x i8> addrspace(2)* %37 to <4 x i32> addrspace(2)* %39 = load <4 x i32>, <4 x i32> addrspace(2)* %38, align 16, !invariant.load !0 %40 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %39, i32 %17, i32 0, i1 false, i1 false) #2 %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !invariant.load !0 %47 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 200) %48 = fmul nsz float %47, %21 %49 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 216) %50 = fmul nsz float %49, %22 %51 = fadd nsz float %50, %48 %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !invariant.load !0 %54 = call nsz float @llvm.SI.load.const(<16 x i8> %53, i32 232) %55 = fmul nsz float %54, %23 %56 = fadd nsz float %55, %51 %57 = call nsz float @llvm.SI.load.const(<16 x i8> %53, i32 248) %58 = fadd nsz float %56, %57 %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !invariant.load !0 %61 = call nsz float @llvm.SI.load.const(<16 x i8> %60, i32 320) %62 = fsub nsz float %58, %61 %63 = call nsz float @llvm.SI.load.const(<16 x i8> %60, i32 336) %64 = fmul nsz float %62, %63 %65 = call nsz float @llvm.SI.load.const(<16 x i8> %60, i32 256) %66 = fmul nsz float %65, %21 %67 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !invariant.load !0 %69 = call nsz float @llvm.SI.load.const(<16 x i8> %68, i32 260) %70 = fmul nsz float %69, %21 %71 = call nsz float @llvm.SI.load.const(<16 x i8> %68, i32 264) %72 = fmul nsz float %71, %21 %73 = call nsz float @llvm.SI.load.const(<16 x i8> %68, i32 268) %74 = fmul nsz float %73, %21 %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !invariant.load !0 %77 = call nsz float @llvm.SI.load.const(<16 x i8> %76, i32 272) %78 = fmul nsz float %77, %22 %79 = fadd nsz float %78, %66 %80 = call nsz float @llvm.SI.load.const(<16 x i8> %76, i32 276) %81 = fmul nsz float %80, %22 %82 = fadd nsz float %81, %70 %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !invariant.load !0 %85 = call nsz float @llvm.SI.load.const(<16 x i8> %84, i32 280) %86 = fmul nsz float %85, %22 %87 = fadd nsz float %86, %72 %88 = call nsz float @llvm.SI.load.const(<16 x i8> %84, i32 284) %89 = fmul nsz float %88, %22 %90 = fadd nsz float %89, %74 %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !invariant.load !0 %93 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 288) %94 = fmul nsz float %93, %23 %95 = fadd nsz float %94, %79 %96 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 292) %97 = fmul nsz float %96, %23 %98 = fadd nsz float %97, %82 %99 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !invariant.load !0 %101 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 296) %102 = fmul nsz float %101, %23 %103 = fadd nsz float %102, %87 %104 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 300) %105 = fmul nsz float %104, %23 %106 = fadd nsz float %105, %90 %107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !invariant.load !0 %109 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 304) %110 = fadd nsz float %95, %109 %111 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 308) %112 = fadd nsz float %98, %111 %113 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 312) %114 = fadd nsz float %103, %113 %115 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %116 = load <16 x i8>, <16 x i8> addrspace(2)* %115, align 16, !invariant.load !0 %117 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 316) %118 = fadd nsz float %106, %117 %119 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 128) %120 = fmul nsz float %119, %28 %121 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 132) %122 = fmul nsz float %121, %28 %123 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %124 = load <16 x i8>, <16 x i8> addrspace(2)* %123, align 16, !invariant.load !0 %125 = call nsz float @llvm.SI.load.const(<16 x i8> %124, i32 136) %126 = fmul nsz float %125, %28 %127 = call nsz float @llvm.SI.load.const(<16 x i8> %124, i32 144) %128 = fmul nsz float %127, %29 %129 = fadd nsz float %128, %120 %130 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %131 = load <16 x i8>, <16 x i8> addrspace(2)* %130, align 16, !invariant.load !0 %132 = call nsz float @llvm.SI.load.const(<16 x i8> %131, i32 148) %133 = fmul nsz float %132, %29 %134 = fadd nsz float %133, %122 %135 = call nsz float @llvm.SI.load.const(<16 x i8> %131, i32 152) %136 = fmul nsz float %135, %29 %137 = fadd nsz float %136, %126 %138 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %139 = load <16 x i8>, <16 x i8> addrspace(2)* %138, align 16, !invariant.load !0 %140 = call nsz float @llvm.SI.load.const(<16 x i8> %139, i32 160) %141 = fmul nsz float %140, %30 %142 = fadd nsz float %141, %129 %143 = call nsz float @llvm.SI.load.const(<16 x i8> %139, i32 164) %144 = fmul nsz float %143, %30 %145 = fadd nsz float %144, %134 %146 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %147 = load <16 x i8>, <16 x i8> addrspace(2)* %146, align 16, !invariant.load !0 %148 = call nsz float @llvm.SI.load.const(<16 x i8> %147, i32 168) %149 = fmul nsz float %148, %30 %150 = fadd nsz float %149, %137 %151 = fmul nsz float %142, %142 %152 = fmul nsz float %145, %145 %153 = fadd nsz float %152, %151 %154 = fmul nsz float %150, %150 %155 = fadd nsz float %153, %154 %156 = call nsz float @llvm.sqrt.f32(float %155) #2 %157 = fdiv nsz float 1.000000e+00, %156, !fpmath !1 %158 = fmul nsz float %142, %157 %159 = fmul nsz float %145, %157 %160 = fmul nsz float %150, %157 %161 = fsub nsz float -0.000000e+00, %158 %162 = fsub nsz float -0.000000e+00, %159 %163 = fsub nsz float -0.000000e+00, %160 %164 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %165 = load <16 x i8>, <16 x i8> addrspace(2)* %164, align 16, !invariant.load !0 %166 = call nsz float @llvm.SI.load.const(<16 x i8> %165, i32 128) %167 = fmul nsz float %166, %21 %168 = call nsz float @llvm.SI.load.const(<16 x i8> %165, i32 132) %169 = fmul nsz float %168, %21 %170 = call nsz float @llvm.SI.load.const(<16 x i8> %165, i32 136) %171 = fmul nsz float %170, %21 %172 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %173 = load <16 x i8>, <16 x i8> addrspace(2)* %172, align 16, !invariant.load !0 %174 = call nsz float @llvm.SI.load.const(<16 x i8> %173, i32 144) %175 = fmul nsz float %174, %22 %176 = fadd nsz float %175, %167 %177 = call nsz float @llvm.SI.load.const(<16 x i8> %173, i32 148) %178 = fmul nsz float %177, %22 %179 = fadd nsz float %178, %169 %180 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %181 = load <16 x i8>, <16 x i8> addrspace(2)* %180, align 16, !invariant.load !0 %182 = call nsz float @llvm.SI.load.const(<16 x i8> %181, i32 152) %183 = fmul nsz float %182, %22 %184 = fadd nsz float %183, %171 %185 = call nsz float @llvm.SI.load.const(<16 x i8> %181, i32 160) %186 = fmul nsz float %185, %23 %187 = fadd nsz float %186, %176 %188 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %189 = load <16 x i8>, <16 x i8> addrspace(2)* %188, align 16, !invariant.load !0 %190 = call nsz float @llvm.SI.load.const(<16 x i8> %189, i32 164) %191 = fmul nsz float %190, %23 %192 = fadd nsz float %191, %179 %193 = call nsz float @llvm.SI.load.const(<16 x i8> %189, i32 168) %194 = fmul nsz float %193, %23 %195 = fadd nsz float %194, %184 %196 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %197 = load <16 x i8>, <16 x i8> addrspace(2)* %196, align 16, !invariant.load !0 %198 = call nsz float @llvm.SI.load.const(<16 x i8> %197, i32 176) %199 = fadd nsz float %187, %198 %200 = call nsz float @llvm.SI.load.const(<16 x i8> %197, i32 180) %201 = fadd nsz float %192, %200 %202 = call nsz float @llvm.SI.load.const(<16 x i8> %197, i32 184) %203 = fadd nsz float %195, %202 %204 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %205 = load <16 x i8>, <16 x i8> addrspace(2)* %204, align 16, !invariant.load !0 %206 = call nsz float @llvm.SI.load.const(<16 x i8> %205, i32 368) %207 = call nsz float @llvm.SI.load.const(<16 x i8> %205, i32 372) %208 = call nsz float @llvm.SI.load.const(<16 x i8> %205, i32 376) %209 = fmul nsz float %206, %161 %210 = fmul nsz float %207, %162 %211 = fadd nsz float %210, %209 %212 = fmul nsz float %208, %163 %213 = fadd nsz float %211, %212 %214 = call nsz float @llvm.maxnum.f32(float %213, float 0.000000e+00) #2 %215 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %216 = load <16 x i8>, <16 x i8> addrspace(2)* %215, align 16, !invariant.load !0 %217 = call nsz float @llvm.SI.load.const(<16 x i8> %216, i32 432) %218 = fmul nsz float %214, %217 %219 = call nsz float @llvm.SI.load.const(<16 x i8> %216, i32 436) %220 = fmul nsz float %214, %219 %221 = call nsz float @llvm.SI.load.const(<16 x i8> %216, i32 440) %222 = fmul nsz float %214, %221 %223 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %224 = load <16 x i8>, <16 x i8> addrspace(2)* %223, align 16, !invariant.load !0 %225 = call nsz float @llvm.SI.load.const(<16 x i8> %224, i32 384) %226 = call nsz float @llvm.SI.load.const(<16 x i8> %224, i32 388) %227 = call nsz float @llvm.SI.load.const(<16 x i8> %224, i32 392) %228 = fmul nsz float %225, %161 %229 = fmul nsz float %226, %162 %230 = fadd nsz float %229, %228 %231 = fmul nsz float %227, %163 %232 = fadd nsz float %230, %231 %233 = call nsz float @llvm.maxnum.f32(float %232, float 0.000000e+00) #2 %234 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %235 = load <16 x i8>, <16 x i8> addrspace(2)* %234, align 16, !invariant.load !0 %236 = call nsz float @llvm.SI.load.const(<16 x i8> %235, i32 448) %237 = fmul nsz float %233, %236 %238 = fadd nsz float %237, %218 %239 = call nsz float @llvm.SI.load.const(<16 x i8> %235, i32 452) %240 = fmul nsz float %233, %239 %241 = fadd nsz float %240, %220 %242 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %243 = load <16 x i8>, <16 x i8> addrspace(2)* %242, align 16, !invariant.load !0 %244 = call nsz float @llvm.SI.load.const(<16 x i8> %243, i32 456) %245 = fmul nsz float %233, %244 %246 = fadd nsz float %245, %222 %247 = call nsz float @llvm.SI.load.const(<16 x i8> %243, i32 400) %248 = call nsz float @llvm.SI.load.const(<16 x i8> %243, i32 404) %249 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %250 = load <16 x i8>, <16 x i8> addrspace(2)* %249, align 16, !invariant.load !0 %251 = call nsz float @llvm.SI.load.const(<16 x i8> %250, i32 408) %252 = fmul nsz float %247, %161 %253 = fmul nsz float %248, %162 %254 = fadd nsz float %253, %252 %255 = fmul nsz float %251, %163 %256 = fadd nsz float %254, %255 %257 = call nsz float @llvm.maxnum.f32(float %256, float 0.000000e+00) #2 %258 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %259 = load <16 x i8>, <16 x i8> addrspace(2)* %258, align 16, !invariant.load !0 %260 = call nsz float @llvm.SI.load.const(<16 x i8> %259, i32 464) %261 = fmul nsz float %257, %260 %262 = fadd nsz float %261, %238 %263 = call nsz float @llvm.SI.load.const(<16 x i8> %259, i32 468) %264 = fmul nsz float %257, %263 %265 = fadd nsz float %264, %241 %266 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %267 = load <16 x i8>, <16 x i8> addrspace(2)* %266, align 16, !invariant.load !0 %268 = call nsz float @llvm.SI.load.const(<16 x i8> %267, i32 472) %269 = fmul nsz float %257, %268 %270 = fadd nsz float %269, %246 %271 = call nsz float @llvm.SI.load.const(<16 x i8> %267, i32 416) %272 = call nsz float @llvm.SI.load.const(<16 x i8> %267, i32 420) %273 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %274 = load <16 x i8>, <16 x i8> addrspace(2)* %273, align 16, !invariant.load !0 %275 = call nsz float @llvm.SI.load.const(<16 x i8> %274, i32 424) %276 = fmul nsz float %271, %161 %277 = fmul nsz float %272, %162 %278 = fadd nsz float %277, %276 %279 = fmul nsz float %275, %163 %280 = fadd nsz float %278, %279 %281 = call nsz float @llvm.maxnum.f32(float %280, float 0.000000e+00) #2 %282 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %283 = load <16 x i8>, <16 x i8> addrspace(2)* %282, align 16, !invariant.load !0 %284 = call nsz float @llvm.SI.load.const(<16 x i8> %283, i32 480) %285 = fmul nsz float %281, %284 %286 = fadd nsz float %285, %262 %287 = call nsz float @llvm.SI.load.const(<16 x i8> %283, i32 484) %288 = fmul nsz float %281, %287 %289 = fadd nsz float %288, %265 %290 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %291 = load <16 x i8>, <16 x i8> addrspace(2)* %290, align 16, !invariant.load !0 %292 = call nsz float @llvm.SI.load.const(<16 x i8> %291, i32 488) %293 = fmul nsz float %281, %292 %294 = fadd nsz float %293, %270 %295 = call nsz float @llvm.SI.load.const(<16 x i8> %291, i32 496) %296 = fsub nsz float %199, %295 %297 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %298 = load <16 x i8>, <16 x i8> addrspace(2)* %297, align 16, !invariant.load !0 %299 = call nsz float @llvm.SI.load.const(<16 x i8> %298, i32 500) %300 = fsub nsz float %201, %299 %301 = call nsz float @llvm.SI.load.const(<16 x i8> %298, i32 504) %302 = fsub nsz float %203, %301 %303 = fmul nsz float %296, %296 %304 = fmul nsz float %300, %300 %305 = fadd nsz float %304, %303 %306 = fmul nsz float %302, %302 %307 = fadd nsz float %305, %306 %308 = call nsz float @llvm.sqrt.f32(float %307) #2 %309 = fdiv nsz float 1.000000e+00, %308, !fpmath !1 %310 = fmul nsz float %296, %309 %311 = fmul nsz float %300, %309 %312 = fmul nsz float %302, %309 %313 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %314 = load <16 x i8>, <16 x i8> addrspace(2)* %313, align 16, !invariant.load !0 %315 = call nsz float @llvm.SI.load.const(<16 x i8> %314, i32 508) %316 = fcmp nsz olt float %308, %315 %317 = select i1 %316, float 1.000000e+00, float 0.000000e+00 %318 = fmul nsz float %310, %161 %319 = fmul nsz float %311, %162 %320 = fadd nsz float %319, %318 %321 = fmul nsz float %312, %163 %322 = fadd nsz float %320, %321 %323 = call nsz float @llvm.maxnum.f32(float %322, float 0.000000e+00) #2 %324 = fmul nsz float %323, %317 %325 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %326 = load <16 x i8>, <16 x i8> addrspace(2)* %325, align 16, !invariant.load !0 %327 = call nsz float @llvm.SI.load.const(<16 x i8> %326, i32 560) %328 = fmul nsz float %324, %327 %329 = fadd nsz float %328, %286 %330 = call nsz float @llvm.SI.load.const(<16 x i8> %326, i32 564) %331 = fmul nsz float %324, %330 %332 = fadd nsz float %331, %289 %333 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %334 = load <16 x i8>, <16 x i8> addrspace(2)* %333, align 16, !invariant.load !0 %335 = call nsz float @llvm.SI.load.const(<16 x i8> %334, i32 568) %336 = fmul nsz float %324, %335 %337 = fadd nsz float %336, %294 %338 = call nsz float @llvm.SI.load.const(<16 x i8> %334, i32 512) %339 = fsub nsz float %199, %338 %340 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %341 = load <16 x i8>, <16 x i8> addrspace(2)* %340, align 16, !invariant.load !0 %342 = call nsz float @llvm.SI.load.const(<16 x i8> %341, i32 516) %343 = fsub nsz float %201, %342 %344 = call nsz float @llvm.SI.load.const(<16 x i8> %341, i32 520) %345 = fsub nsz float %203, %344 %346 = fmul nsz float %339, %339 %347 = fmul nsz float %343, %343 %348 = fadd nsz float %347, %346 %349 = fmul nsz float %345, %345 %350 = fadd nsz float %348, %349 %351 = call nsz float @llvm.sqrt.f32(float %350) #2 %352 = fdiv nsz float 1.000000e+00, %351, !fpmath !1 %353 = fmul nsz float %339, %352 %354 = fmul nsz float %343, %352 %355 = fmul nsz float %345, %352 %356 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %357 = load <16 x i8>, <16 x i8> addrspace(2)* %356, align 16, !invariant.load !0 %358 = call nsz float @llvm.SI.load.const(<16 x i8> %357, i32 524) %359 = fcmp nsz olt float %351, %358 %360 = select i1 %359, float 1.000000e+00, float 0.000000e+00 %361 = fmul nsz float %353, %161 %362 = fmul nsz float %354, %162 %363 = fadd nsz float %362, %361 %364 = fmul nsz float %355, %163 %365 = fadd nsz float %363, %364 %366 = call nsz float @llvm.maxnum.f32(float %365, float 0.000000e+00) #2 %367 = fmul nsz float %366, %360 %368 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %369 = load <16 x i8>, <16 x i8> addrspace(2)* %368, align 16, !invariant.load !0 %370 = call nsz float @llvm.SI.load.const(<16 x i8> %369, i32 576) %371 = fmul nsz float %367, %370 %372 = fadd nsz float %371, %329 %373 = call nsz float @llvm.SI.load.const(<16 x i8> %369, i32 580) %374 = fmul nsz float %367, %373 %375 = fadd nsz float %374, %332 %376 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %377 = load <16 x i8>, <16 x i8> addrspace(2)* %376, align 16, !invariant.load !0 %378 = call nsz float @llvm.SI.load.const(<16 x i8> %377, i32 584) %379 = fmul nsz float %367, %378 %380 = fadd nsz float %379, %337 %381 = call nsz float @llvm.SI.load.const(<16 x i8> %377, i32 528) %382 = fsub nsz float %199, %381 %383 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %384 = load <16 x i8>, <16 x i8> addrspace(2)* %383, align 16, !invariant.load !0 %385 = call nsz float @llvm.SI.load.const(<16 x i8> %384, i32 532) %386 = fsub nsz float %201, %385 %387 = call nsz float @llvm.SI.load.const(<16 x i8> %384, i32 536) %388 = fsub nsz float %203, %387 %389 = fmul nsz float %382, %382 %390 = fmul nsz float %386, %386 %391 = fadd nsz float %390, %389 %392 = fmul nsz float %388, %388 %393 = fadd nsz float %391, %392 %394 = call nsz float @llvm.sqrt.f32(float %393) #2 %395 = fdiv nsz float 1.000000e+00, %394, !fpmath !1 %396 = fmul nsz float %382, %395 %397 = fmul nsz float %386, %395 %398 = fmul nsz float %388, %395 %399 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %400 = load <16 x i8>, <16 x i8> addrspace(2)* %399, align 16, !invariant.load !0 %401 = call nsz float @llvm.SI.load.const(<16 x i8> %400, i32 540) %402 = fcmp nsz olt float %394, %401 %403 = select i1 %402, float 1.000000e+00, float 0.000000e+00 %404 = fmul nsz float %396, %161 %405 = fmul nsz float %397, %162 %406 = fadd nsz float %405, %404 %407 = fmul nsz float %398, %163 %408 = fadd nsz float %406, %407 %409 = call nsz float @llvm.maxnum.f32(float %408, float 0.000000e+00) #2 %410 = fmul nsz float %409, %403 %411 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %412 = load <16 x i8>, <16 x i8> addrspace(2)* %411, align 16, !invariant.load !0 %413 = call nsz float @llvm.SI.load.const(<16 x i8> %412, i32 592) %414 = fmul nsz float %410, %413 %415 = fadd nsz float %414, %372 %416 = call nsz float @llvm.SI.load.const(<16 x i8> %412, i32 596) %417 = fmul nsz float %410, %416 %418 = fadd nsz float %417, %375 %419 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %420 = load <16 x i8>, <16 x i8> addrspace(2)* %419, align 16, !invariant.load !0 %421 = call nsz float @llvm.SI.load.const(<16 x i8> %420, i32 600) %422 = fmul nsz float %410, %421 %423 = fadd nsz float %422, %380 %424 = call nsz float @llvm.SI.load.const(<16 x i8> %420, i32 544) %425 = fsub nsz float %199, %424 %426 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %427 = load <16 x i8>, <16 x i8> addrspace(2)* %426, align 16, !invariant.load !0 %428 = call nsz float @llvm.SI.load.const(<16 x i8> %427, i32 548) %429 = fsub nsz float %201, %428 %430 = call nsz float @llvm.SI.load.const(<16 x i8> %427, i32 552) %431 = fsub nsz float %203, %430 %432 = fmul nsz float %425, %425 %433 = fmul nsz float %429, %429 %434 = fadd nsz float %433, %432 %435 = fmul nsz float %431, %431 %436 = fadd nsz float %434, %435 %437 = call nsz float @llvm.sqrt.f32(float %436) #2 %438 = fdiv nsz float 1.000000e+00, %437, !fpmath !1 %439 = fmul nsz float %425, %438 %440 = fmul nsz float %429, %438 %441 = fmul nsz float %431, %438 %442 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %443 = load <16 x i8>, <16 x i8> addrspace(2)* %442, align 16, !invariant.load !0 %444 = call nsz float @llvm.SI.load.const(<16 x i8> %443, i32 556) %445 = fcmp nsz olt float %437, %444 %446 = select i1 %445, float 1.000000e+00, float 0.000000e+00 %447 = fmul nsz float %439, %161 %448 = fmul nsz float %440, %162 %449 = fadd nsz float %448, %447 %450 = fmul nsz float %441, %163 %451 = fadd nsz float %449, %450 %452 = call nsz float @llvm.maxnum.f32(float %451, float 0.000000e+00) #2 %453 = fmul nsz float %452, %446 %454 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %455 = load <16 x i8>, <16 x i8> addrspace(2)* %454, align 16, !invariant.load !0 %456 = call nsz float @llvm.SI.load.const(<16 x i8> %455, i32 608) %457 = fmul nsz float %453, %456 %458 = fadd nsz float %457, %415 %459 = call nsz float @llvm.SI.load.const(<16 x i8> %455, i32 612) %460 = fmul nsz float %453, %459 %461 = fadd nsz float %460, %418 %462 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %463 = load <16 x i8>, <16 x i8> addrspace(2)* %462, align 16, !invariant.load !0 %464 = call nsz float @llvm.SI.load.const(<16 x i8> %463, i32 616) %465 = fmul nsz float %453, %464 %466 = fadd nsz float %465, %423 %467 = fmul nsz float %458, %41 %468 = fmul nsz float %461, %42 %469 = fmul nsz float %466, %43 %470 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %471 = load <16 x i8>, <16 x i8> addrspace(2)* %470, align 16, !invariant.load !0 %472 = call nsz float @llvm.SI.load.const(<16 x i8> %471, i32 352) %473 = fadd nsz float %467, %472 %474 = call nsz float @llvm.SI.load.const(<16 x i8> %471, i32 356) %475 = fadd nsz float %468, %474 %476 = call nsz float @llvm.SI.load.const(<16 x i8> %471, i32 360) %477 = fadd nsz float %469, %476 %478 = call nsz float @llvm.minnum.f32(float %473, float 1.000000e+00) #2 %479 = call nsz float @llvm.minnum.f32(float %475, float 1.000000e+00) #2 %480 = call nsz float @llvm.minnum.f32(float %477, float 1.000000e+00) #2 %481 = bitcast i32 %12 to float %482 = insertvalue <{ float, float, float }> undef, float %481, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %478, float %479, float %480, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %35, float %36, float %64, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %110, float %112, float %114, float %118) call void @llvm.SI.export(i32 1, i32 0, i32 1, i32 13, i32 0, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret <{ float, float, float }> %482 } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #2 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #3 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } attributes #3 = { nounwind } !0 = !{} !1 = !{float 2.500000e+00} VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1].x, PSIZE DCL OUT[2], GENERIC[0] DCL OUT[3].xy, GENERIC[1] DCL CONST[0..52] DCL TEMP[0..17], LOCAL DCL ADDR[0] IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {0, 1, 2, 3} IMM[2] INT32 {4, 5, 6, 7} IMM[3] INT32 {8, 0, 0, 0} 0: MOV TEMP[0].w, IMM[0].xxxx 1: MOV TEMP[0].xyz, IN[0].xyzx 2: MUL TEMP[0], CONST[16], IN[0].xxxx 3: MAD TEMP[0], CONST[17], IN[0].yyyy, TEMP[0] 4: MAD TEMP[0], CONST[18], IN[0].zzzz, TEMP[0] 5: ADD OUT[0], TEMP[0], CONST[19] 6: MOV OUT[1].x, IMM[0].xxxx 7: MOV TEMP[0].w, IMM[0].yyyy 8: MOV TEMP[0].xyz, IN[1].xyzx 9: MUL TEMP[0], CONST[8], IN[1].xxxx 10: MAD TEMP[0], CONST[9], IN[1].yyyy, TEMP[0] 11: MAD TEMP[0], CONST[10], IN[1].zzzz, TEMP[0] 12: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 13: RSQ TEMP[1].x, TEMP[1].xxxx 14: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 15: MOV TEMP[0].xyz, -TEMP[0].xyzx 16: MUL TEMP[1], CONST[8], IN[0].xxxx 17: MAD TEMP[1], CONST[9], IN[0].yyyy, TEMP[1] 18: MAD TEMP[1], CONST[10], IN[0].zzzz, TEMP[1] 19: ADD TEMP[1], TEMP[1], CONST[11] 20: DP3 TEMP[2].x, TEMP[0].xyzz, CONST[21].xyzz 21: MAX TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 22: MUL TEMP[3], TEMP[2].xxxx, CONST[29] 23: DP3 TEMP[2].x, TEMP[0].xyzz, CONST[22].xyzz 24: MAX TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 25: MAD TEMP[3], TEMP[2].xxxx, CONST[30], TEMP[3] 26: DP3 TEMP[2].x, TEMP[0].xyzz, CONST[23].xyzz 27: MAX TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 28: MAD TEMP[3], TEMP[2].xxxx, CONST[31], TEMP[3] 29: DP3 TEMP[2].x, TEMP[0].xyzz, CONST[24].xyzz 30: MAX TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 31: MAD TEMP[3], TEMP[2].xxxx, CONST[32], TEMP[3] 32: DP3 TEMP[2].x, TEMP[0].xyzz, CONST[25].xyzz 33: MAX TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 34: MAD TEMP[3], TEMP[2].xxxx, CONST[33], TEMP[3] 35: DP3 TEMP[2].x, TEMP[0].xyzz, CONST[26].xyzz 36: MAX TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 37: MAD TEMP[3], TEMP[2].xxxx, CONST[34], TEMP[3] 38: DP3 TEMP[2].x, TEMP[0].xyzz, CONST[27].xyzz 39: MAX TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 40: MAD TEMP[3], TEMP[2].xxxx, CONST[35], TEMP[3] 41: DP3 TEMP[2].x, TEMP[0].xyzz, CONST[28].xyzz 42: MAX TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 43: MAD TEMP[3], TEMP[2].xxxx, CONST[36], TEMP[3] 44: MOV TEMP[2].x, IMM[3].xxxx 45: MOV TEMP[2].x, IMM[1].xxxx 46: BGNLOOP 47: ISGE TEMP[4].x, TEMP[2].xxxx, IMM[3].xxxx 48: UIF TEMP[4].xxxx 49: BRK 50: ENDIF 51: UARL ADDR[0].x, TEMP[2].xxxx 52: MOV TEMP[5], CONST[ADDR[0].x+37] 53: UARL ADDR[0].x, TEMP[2].xxxx 54: MOV TEMP[6], CONST[ADDR[0].x+45] 55: ADD TEMP[7].xyz, TEMP[1].xyzz, -TEMP[5].xyzz 56: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 57: SQRT TEMP[9].x, TEMP[8].xxxx 58: RCP TEMP[10].x, TEMP[9].xxxx 59: MUL TEMP[11].xyz, TEMP[7].xyzz, TEMP[10].xxxx 60: FSLT TEMP[12].x, TEMP[9].xxxx, TEMP[5].wwww 61: UCMP TEMP[13].x, TEMP[12].xxxx, IMM[0].xxxx, TEMP[13].xxxx 62: NOT TEMP[14].x, TEMP[12].xxxx 63: UCMP TEMP[13].x, TEMP[14].xxxx, IMM[0].yyyy, TEMP[13].xxxx 64: DP3 TEMP[15].x, TEMP[0].xyzz, TEMP[11].xyzz 65: MAX TEMP[16].x, IMM[0].yyyy, TEMP[15].xxxx 66: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[13].xxxx 67: MAD TEMP[3], TEMP[17].xxxx, TEMP[6], TEMP[3] 68: UADD TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy 69: ENDLOOP 70: MUL TEMP[3], TEMP[3], IN[3] 71: ADD TEMP[3], TEMP[3], CONST[20] 72: MIN TEMP[0].xyz, IMM[0].xxxx, TEMP[3] 73: MOV TEMP[3].xyz, TEMP[0].xyzx 74: MOV TEMP[3].w, IN[3].wwww 75: MOV OUT[2], TEMP[3] 76: MOV OUT[3].xy, IN[2].xyxx 77: END radeonsi: Compiling shader 20 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #0 { main_body: %18 = bitcast [16 x <16 x i8>] addrspace(2)* %5 to <4 x i32> addrspace(2)* %19 = load <4 x i32>, <4 x i32> addrspace(2)* %18, align 16, !invariant.load !0 %20 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %19, i32 %14, i32 0, i1 false, i1 false) #2 %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = extractelement <4 x float> %20, i32 2 %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %25 = bitcast <16 x i8> addrspace(2)* %24 to <4 x i32> addrspace(2)* %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !invariant.load !0 %27 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %26, i32 %15, i32 0, i1 false, i1 false) #2 %28 = extractelement <4 x float> %27, i32 0 %29 = extractelement <4 x float> %27, i32 1 %30 = extractelement <4 x float> %27, i32 2 %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2, !amdgpu.uniform !0 %32 = bitcast <16 x i8> addrspace(2)* %31 to <4 x i32> addrspace(2)* %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !invariant.load !0 %34 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %33, i32 %16, i32 0, i1 false, i1 false) #2 %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3, !amdgpu.uniform !0 %38 = bitcast <16 x i8> addrspace(2)* %37 to <4 x i32> addrspace(2)* %39 = load <4 x i32>, <4 x i32> addrspace(2)* %38, align 16, !invariant.load !0 %40 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %39, i32 %17, i32 0, i1 false, i1 false) #2 %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !invariant.load !0 %47 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 256) %48 = fmul nsz float %47, %21 %49 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 260) %50 = fmul nsz float %49, %21 %51 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 264) %52 = fmul nsz float %51, %21 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !invariant.load !0 %55 = call nsz float @llvm.SI.load.const(<16 x i8> %54, i32 268) %56 = fmul nsz float %55, %21 %57 = call nsz float @llvm.SI.load.const(<16 x i8> %54, i32 272) %58 = fmul nsz float %57, %22 %59 = fadd nsz float %58, %48 %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !invariant.load !0 %62 = call nsz float @llvm.SI.load.const(<16 x i8> %61, i32 276) %63 = fmul nsz float %62, %22 %64 = fadd nsz float %63, %50 %65 = call nsz float @llvm.SI.load.const(<16 x i8> %61, i32 280) %66 = fmul nsz float %65, %22 %67 = fadd nsz float %66, %52 %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !invariant.load !0 %70 = call nsz float @llvm.SI.load.const(<16 x i8> %69, i32 284) %71 = fmul nsz float %70, %22 %72 = fadd nsz float %71, %56 %73 = call nsz float @llvm.SI.load.const(<16 x i8> %69, i32 288) %74 = fmul nsz float %73, %23 %75 = fadd nsz float %74, %59 %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !invariant.load !0 %78 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 292) %79 = fmul nsz float %78, %23 %80 = fadd nsz float %79, %64 %81 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 296) %82 = fmul nsz float %81, %23 %83 = fadd nsz float %82, %67 %84 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %85 = load <16 x i8>, <16 x i8> addrspace(2)* %84, align 16, !invariant.load !0 %86 = call nsz float @llvm.SI.load.const(<16 x i8> %85, i32 300) %87 = fmul nsz float %86, %23 %88 = fadd nsz float %87, %72 %89 = call nsz float @llvm.SI.load.const(<16 x i8> %85, i32 304) %90 = fadd nsz float %75, %89 %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !invariant.load !0 %93 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 308) %94 = fadd nsz float %80, %93 %95 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 312) %96 = fadd nsz float %83, %95 %97 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 316) %98 = fadd nsz float %88, %97 %99 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !invariant.load !0 %101 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 128) %102 = fmul nsz float %101, %28 %103 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 132) %104 = fmul nsz float %103, %28 %105 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 136) %106 = fmul nsz float %105, %28 %107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !invariant.load !0 %109 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 144) %110 = fmul nsz float %109, %29 %111 = fadd nsz float %110, %102 %112 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 148) %113 = fmul nsz float %112, %29 %114 = fadd nsz float %113, %104 %115 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %116 = load <16 x i8>, <16 x i8> addrspace(2)* %115, align 16, !invariant.load !0 %117 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 152) %118 = fmul nsz float %117, %29 %119 = fadd nsz float %118, %106 %120 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 160) %121 = fmul nsz float %120, %30 %122 = fadd nsz float %121, %111 %123 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %124 = load <16 x i8>, <16 x i8> addrspace(2)* %123, align 16, !invariant.load !0 %125 = call nsz float @llvm.SI.load.const(<16 x i8> %124, i32 164) %126 = fmul nsz float %125, %30 %127 = fadd nsz float %126, %114 %128 = call nsz float @llvm.SI.load.const(<16 x i8> %124, i32 168) %129 = fmul nsz float %128, %30 %130 = fadd nsz float %129, %119 %131 = fmul nsz float %122, %122 %132 = fmul nsz float %127, %127 %133 = fadd nsz float %132, %131 %134 = fmul nsz float %130, %130 %135 = fadd nsz float %133, %134 %136 = call nsz float @llvm.sqrt.f32(float %135) #2 %137 = fdiv nsz float 1.000000e+00, %136, !fpmath !1 %138 = fmul nsz float %122, %137 %139 = fmul nsz float %127, %137 %140 = fmul nsz float %130, %137 %141 = fsub nsz float -0.000000e+00, %138 %142 = fsub nsz float -0.000000e+00, %139 %143 = fsub nsz float -0.000000e+00, %140 %144 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %145 = load <16 x i8>, <16 x i8> addrspace(2)* %144, align 16, !invariant.load !0 %146 = call nsz float @llvm.SI.load.const(<16 x i8> %145, i32 128) %147 = fmul nsz float %146, %21 %148 = call nsz float @llvm.SI.load.const(<16 x i8> %145, i32 132) %149 = fmul nsz float %148, %21 %150 = call nsz float @llvm.SI.load.const(<16 x i8> %145, i32 136) %151 = fmul nsz float %150, %21 %152 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %153 = load <16 x i8>, <16 x i8> addrspace(2)* %152, align 16, !invariant.load !0 %154 = call nsz float @llvm.SI.load.const(<16 x i8> %153, i32 144) %155 = fmul nsz float %154, %22 %156 = fadd nsz float %155, %147 %157 = call nsz float @llvm.SI.load.const(<16 x i8> %153, i32 148) %158 = fmul nsz float %157, %22 %159 = fadd nsz float %158, %149 %160 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %161 = load <16 x i8>, <16 x i8> addrspace(2)* %160, align 16, !invariant.load !0 %162 = call nsz float @llvm.SI.load.const(<16 x i8> %161, i32 152) %163 = fmul nsz float %162, %22 %164 = fadd nsz float %163, %151 %165 = call nsz float @llvm.SI.load.const(<16 x i8> %161, i32 160) %166 = fmul nsz float %165, %23 %167 = fadd nsz float %166, %156 %168 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %169 = load <16 x i8>, <16 x i8> addrspace(2)* %168, align 16, !invariant.load !0 %170 = call nsz float @llvm.SI.load.const(<16 x i8> %169, i32 164) %171 = fmul nsz float %170, %23 %172 = fadd nsz float %171, %159 %173 = call nsz float @llvm.SI.load.const(<16 x i8> %169, i32 168) %174 = fmul nsz float %173, %23 %175 = fadd nsz float %174, %164 %176 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %177 = load <16 x i8>, <16 x i8> addrspace(2)* %176, align 16, !invariant.load !0 %178 = call nsz float @llvm.SI.load.const(<16 x i8> %177, i32 176) %179 = fadd nsz float %167, %178 %180 = call nsz float @llvm.SI.load.const(<16 x i8> %177, i32 180) %181 = fadd nsz float %172, %180 %182 = call nsz float @llvm.SI.load.const(<16 x i8> %177, i32 184) %183 = fadd nsz float %175, %182 %184 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %185 = load <16 x i8>, <16 x i8> addrspace(2)* %184, align 16, !invariant.load !0 %186 = call nsz float @llvm.SI.load.const(<16 x i8> %185, i32 336) %187 = call nsz float @llvm.SI.load.const(<16 x i8> %185, i32 340) %188 = call nsz float @llvm.SI.load.const(<16 x i8> %185, i32 344) %189 = fmul nsz float %186, %141 %190 = fmul nsz float %187, %142 %191 = fadd nsz float %190, %189 %192 = fmul nsz float %188, %143 %193 = fadd nsz float %191, %192 %194 = call nsz float @llvm.maxnum.f32(float %193, float 0.000000e+00) #2 %195 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %196 = load <16 x i8>, <16 x i8> addrspace(2)* %195, align 16, !invariant.load !0 %197 = call nsz float @llvm.SI.load.const(<16 x i8> %196, i32 464) %198 = fmul nsz float %194, %197 %199 = call nsz float @llvm.SI.load.const(<16 x i8> %196, i32 468) %200 = fmul nsz float %194, %199 %201 = call nsz float @llvm.SI.load.const(<16 x i8> %196, i32 472) %202 = fmul nsz float %194, %201 %203 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %204 = load <16 x i8>, <16 x i8> addrspace(2)* %203, align 16, !invariant.load !0 %205 = call nsz float @llvm.SI.load.const(<16 x i8> %204, i32 352) %206 = call nsz float @llvm.SI.load.const(<16 x i8> %204, i32 356) %207 = call nsz float @llvm.SI.load.const(<16 x i8> %204, i32 360) %208 = fmul nsz float %205, %141 %209 = fmul nsz float %206, %142 %210 = fadd nsz float %209, %208 %211 = fmul nsz float %207, %143 %212 = fadd nsz float %210, %211 %213 = call nsz float @llvm.maxnum.f32(float %212, float 0.000000e+00) #2 %214 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %215 = load <16 x i8>, <16 x i8> addrspace(2)* %214, align 16, !invariant.load !0 %216 = call nsz float @llvm.SI.load.const(<16 x i8> %215, i32 480) %217 = fmul nsz float %213, %216 %218 = fadd nsz float %217, %198 %219 = call nsz float @llvm.SI.load.const(<16 x i8> %215, i32 484) %220 = fmul nsz float %213, %219 %221 = fadd nsz float %220, %200 %222 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %223 = load <16 x i8>, <16 x i8> addrspace(2)* %222, align 16, !invariant.load !0 %224 = call nsz float @llvm.SI.load.const(<16 x i8> %223, i32 488) %225 = fmul nsz float %213, %224 %226 = fadd nsz float %225, %202 %227 = call nsz float @llvm.SI.load.const(<16 x i8> %223, i32 368) %228 = call nsz float @llvm.SI.load.const(<16 x i8> %223, i32 372) %229 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %230 = load <16 x i8>, <16 x i8> addrspace(2)* %229, align 16, !invariant.load !0 %231 = call nsz float @llvm.SI.load.const(<16 x i8> %230, i32 376) %232 = fmul nsz float %227, %141 %233 = fmul nsz float %228, %142 %234 = fadd nsz float %233, %232 %235 = fmul nsz float %231, %143 %236 = fadd nsz float %234, %235 %237 = call nsz float @llvm.maxnum.f32(float %236, float 0.000000e+00) #2 %238 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %239 = load <16 x i8>, <16 x i8> addrspace(2)* %238, align 16, !invariant.load !0 %240 = call nsz float @llvm.SI.load.const(<16 x i8> %239, i32 496) %241 = fmul nsz float %237, %240 %242 = fadd nsz float %241, %218 %243 = call nsz float @llvm.SI.load.const(<16 x i8> %239, i32 500) %244 = fmul nsz float %237, %243 %245 = fadd nsz float %244, %221 %246 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %247 = load <16 x i8>, <16 x i8> addrspace(2)* %246, align 16, !invariant.load !0 %248 = call nsz float @llvm.SI.load.const(<16 x i8> %247, i32 504) %249 = fmul nsz float %237, %248 %250 = fadd nsz float %249, %226 %251 = call nsz float @llvm.SI.load.const(<16 x i8> %247, i32 384) %252 = call nsz float @llvm.SI.load.const(<16 x i8> %247, i32 388) %253 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %254 = load <16 x i8>, <16 x i8> addrspace(2)* %253, align 16, !invariant.load !0 %255 = call nsz float @llvm.SI.load.const(<16 x i8> %254, i32 392) %256 = fmul nsz float %251, %141 %257 = fmul nsz float %252, %142 %258 = fadd nsz float %257, %256 %259 = fmul nsz float %255, %143 %260 = fadd nsz float %258, %259 %261 = call nsz float @llvm.maxnum.f32(float %260, float 0.000000e+00) #2 %262 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %263 = load <16 x i8>, <16 x i8> addrspace(2)* %262, align 16, !invariant.load !0 %264 = call nsz float @llvm.SI.load.const(<16 x i8> %263, i32 512) %265 = fmul nsz float %261, %264 %266 = fadd nsz float %265, %242 %267 = call nsz float @llvm.SI.load.const(<16 x i8> %263, i32 516) %268 = fmul nsz float %261, %267 %269 = fadd nsz float %268, %245 %270 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %271 = load <16 x i8>, <16 x i8> addrspace(2)* %270, align 16, !invariant.load !0 %272 = call nsz float @llvm.SI.load.const(<16 x i8> %271, i32 520) %273 = fmul nsz float %261, %272 %274 = fadd nsz float %273, %250 %275 = call nsz float @llvm.SI.load.const(<16 x i8> %271, i32 400) %276 = call nsz float @llvm.SI.load.const(<16 x i8> %271, i32 404) %277 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %278 = load <16 x i8>, <16 x i8> addrspace(2)* %277, align 16, !invariant.load !0 %279 = call nsz float @llvm.SI.load.const(<16 x i8> %278, i32 408) %280 = fmul nsz float %275, %141 %281 = fmul nsz float %276, %142 %282 = fadd nsz float %281, %280 %283 = fmul nsz float %279, %143 %284 = fadd nsz float %282, %283 %285 = call nsz float @llvm.maxnum.f32(float %284, float 0.000000e+00) #2 %286 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %287 = load <16 x i8>, <16 x i8> addrspace(2)* %286, align 16, !invariant.load !0 %288 = call nsz float @llvm.SI.load.const(<16 x i8> %287, i32 528) %289 = fmul nsz float %285, %288 %290 = fadd nsz float %289, %266 %291 = call nsz float @llvm.SI.load.const(<16 x i8> %287, i32 532) %292 = fmul nsz float %285, %291 %293 = fadd nsz float %292, %269 %294 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %295 = load <16 x i8>, <16 x i8> addrspace(2)* %294, align 16, !invariant.load !0 %296 = call nsz float @llvm.SI.load.const(<16 x i8> %295, i32 536) %297 = fmul nsz float %285, %296 %298 = fadd nsz float %297, %274 %299 = call nsz float @llvm.SI.load.const(<16 x i8> %295, i32 416) %300 = call nsz float @llvm.SI.load.const(<16 x i8> %295, i32 420) %301 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %302 = load <16 x i8>, <16 x i8> addrspace(2)* %301, align 16, !invariant.load !0 %303 = call nsz float @llvm.SI.load.const(<16 x i8> %302, i32 424) %304 = fmul nsz float %299, %141 %305 = fmul nsz float %300, %142 %306 = fadd nsz float %305, %304 %307 = fmul nsz float %303, %143 %308 = fadd nsz float %306, %307 %309 = call nsz float @llvm.maxnum.f32(float %308, float 0.000000e+00) #2 %310 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %311 = load <16 x i8>, <16 x i8> addrspace(2)* %310, align 16, !invariant.load !0 %312 = call nsz float @llvm.SI.load.const(<16 x i8> %311, i32 544) %313 = fmul nsz float %309, %312 %314 = fadd nsz float %313, %290 %315 = call nsz float @llvm.SI.load.const(<16 x i8> %311, i32 548) %316 = fmul nsz float %309, %315 %317 = fadd nsz float %316, %293 %318 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %319 = load <16 x i8>, <16 x i8> addrspace(2)* %318, align 16, !invariant.load !0 %320 = call nsz float @llvm.SI.load.const(<16 x i8> %319, i32 552) %321 = fmul nsz float %309, %320 %322 = fadd nsz float %321, %298 %323 = call nsz float @llvm.SI.load.const(<16 x i8> %319, i32 432) %324 = call nsz float @llvm.SI.load.const(<16 x i8> %319, i32 436) %325 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %326 = load <16 x i8>, <16 x i8> addrspace(2)* %325, align 16, !invariant.load !0 %327 = call nsz float @llvm.SI.load.const(<16 x i8> %326, i32 440) %328 = fmul nsz float %323, %141 %329 = fmul nsz float %324, %142 %330 = fadd nsz float %329, %328 %331 = fmul nsz float %327, %143 %332 = fadd nsz float %330, %331 %333 = call nsz float @llvm.maxnum.f32(float %332, float 0.000000e+00) #2 %334 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %335 = load <16 x i8>, <16 x i8> addrspace(2)* %334, align 16, !invariant.load !0 %336 = call nsz float @llvm.SI.load.const(<16 x i8> %335, i32 560) %337 = fmul nsz float %333, %336 %338 = fadd nsz float %337, %314 %339 = call nsz float @llvm.SI.load.const(<16 x i8> %335, i32 564) %340 = fmul nsz float %333, %339 %341 = fadd nsz float %340, %317 %342 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %343 = load <16 x i8>, <16 x i8> addrspace(2)* %342, align 16, !invariant.load !0 %344 = call nsz float @llvm.SI.load.const(<16 x i8> %343, i32 568) %345 = fmul nsz float %333, %344 %346 = fadd nsz float %345, %322 %347 = call nsz float @llvm.SI.load.const(<16 x i8> %343, i32 448) %348 = call nsz float @llvm.SI.load.const(<16 x i8> %343, i32 452) %349 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %350 = load <16 x i8>, <16 x i8> addrspace(2)* %349, align 16, !invariant.load !0 %351 = call nsz float @llvm.SI.load.const(<16 x i8> %350, i32 456) %352 = fmul nsz float %347, %141 %353 = fmul nsz float %348, %142 %354 = fadd nsz float %353, %352 %355 = fmul nsz float %351, %143 %356 = fadd nsz float %354, %355 %357 = call nsz float @llvm.maxnum.f32(float %356, float 0.000000e+00) #2 %358 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %359 = load <16 x i8>, <16 x i8> addrspace(2)* %358, align 16, !invariant.load !0 %360 = call nsz float @llvm.SI.load.const(<16 x i8> %359, i32 576) %361 = fmul nsz float %357, %360 %362 = fadd nsz float %361, %338 %363 = call nsz float @llvm.SI.load.const(<16 x i8> %359, i32 580) %364 = fmul nsz float %357, %363 %365 = fadd nsz float %364, %341 %366 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %367 = load <16 x i8>, <16 x i8> addrspace(2)* %366, align 16 %368 = call nsz float @llvm.SI.load.const(<16 x i8> %367, i32 584) %369 = fmul nsz float %357, %368 %370 = fadd nsz float %369, %346 br label %loop46 loop46: ; preds = %endif50, %main_body %.010 = phi float [ %370, %main_body ], [ %424, %endif50 ] %.09 = phi float [ %365, %main_body ], [ %422, %endif50 ] %.08 = phi float [ %362, %main_body ], [ %420, %endif50 ] %371 = phi i32 [ 0, %main_body ], [ %425, %endif50 ] %372 = phi i32 [ 0, %main_body ], [ %425, %endif50 ] %373 = phi i32 [ 0, %main_body ], [ %425, %endif50 ] %374 = phi i32 [ 0, %main_body ], [ %425, %endif50 ] %375 = icmp sgt i32 %371, 7 br i1 %375, label %endloop69, label %endif50 endif50: ; preds = %loop46 %376 = shl i32 %372, 4 %377 = add i32 %376, 592 %378 = call nsz float @llvm.SI.load.const(<16 x i8> %367, i32 %377) %379 = shl i32 %372, 4 %380 = add i32 %379, 596 %381 = call nsz float @llvm.SI.load.const(<16 x i8> %367, i32 %380) %382 = shl i32 %372, 4 %383 = add i32 %382, 600 %384 = call nsz float @llvm.SI.load.const(<16 x i8> %367, i32 %383) %385 = shl i32 %372, 4 %386 = add i32 %385, 604 %387 = call nsz float @llvm.SI.load.const(<16 x i8> %367, i32 %386) %388 = shl i32 %373, 4 %389 = add i32 %388, 720 %390 = call nsz float @llvm.SI.load.const(<16 x i8> %367, i32 %389) %391 = shl i32 %373, 4 %392 = add i32 %391, 724 %393 = call nsz float @llvm.SI.load.const(<16 x i8> %367, i32 %392) %394 = shl i32 %373, 4 %395 = add i32 %394, 728 %396 = call nsz float @llvm.SI.load.const(<16 x i8> %367, i32 %395) %397 = fsub nsz float %179, %378 %398 = fsub nsz float %181, %381 %399 = fsub nsz float %183, %384 %400 = fmul nsz float %397, %397 %401 = fmul nsz float %398, %398 %402 = fadd nsz float %401, %400 %403 = fmul nsz float %399, %399 %404 = fadd nsz float %402, %403 %405 = call nsz float @llvm.sqrt.f32(float %404) #2 %406 = fdiv nsz float 1.000000e+00, %405, !fpmath !1 %407 = fmul nsz float %397, %406 %408 = fmul nsz float %398, %406 %409 = fmul nsz float %399, %406 %410 = fcmp nsz olt float %405, %387 %411 = select i1 %410, float 1.000000e+00, float 0.000000e+00 %412 = fmul nsz float %407, %141 %413 = fmul nsz float %408, %142 %414 = fadd nsz float %413, %412 %415 = fmul nsz float %409, %143 %416 = fadd nsz float %414, %415 %417 = call nsz float @llvm.maxnum.f32(float %416, float 0.000000e+00) #2 %418 = fmul nsz float %417, %411 %419 = fmul nsz float %418, %390 %420 = fadd nsz float %419, %.08 %421 = fmul nsz float %418, %393 %422 = fadd nsz float %421, %.09 %423 = fmul nsz float %418, %396 %424 = fadd nsz float %423, %.010 %425 = add i32 %374, 1 br label %loop46 endloop69: ; preds = %loop46 %426 = fmul nsz float %.08, %41 %427 = fmul nsz float %.09, %42 %428 = fmul nsz float %.010, %43 %429 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %430 = load <16 x i8>, <16 x i8> addrspace(2)* %429, align 16, !invariant.load !0 %431 = call nsz float @llvm.SI.load.const(<16 x i8> %430, i32 320) %432 = fadd nsz float %426, %431 %433 = call nsz float @llvm.SI.load.const(<16 x i8> %430, i32 324) %434 = fadd nsz float %427, %433 %435 = call nsz float @llvm.SI.load.const(<16 x i8> %430, i32 328) %436 = fadd nsz float %428, %435 %437 = call nsz float @llvm.minnum.f32(float %432, float 1.000000e+00) #2 %438 = call nsz float @llvm.minnum.f32(float %434, float 1.000000e+00) #2 %439 = call nsz float @llvm.minnum.f32(float %436, float 1.000000e+00) #2 %440 = bitcast i32 %12 to float %441 = insertvalue <{ float, float, float }> undef, float %440, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %437, float %438, float %439, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %35, float %36, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %90, float %94, float %96, float %98) call void @llvm.SI.export(i32 1, i32 0, i32 1, i32 13, i32 0, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret <{ float, float, float }> %441 } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #2 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #3 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } attributes #3 = { nounwind } !0 = !{} !1 = !{float 2.500000e+00} VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1].x, PSIZE DCL OUT[2], GENERIC[0] DCL OUT[3].xyz, GENERIC[1] DCL CONST[0..54] DCL TEMP[0..18], LOCAL DCL ADDR[0] IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {0, 1, 2, 3} IMM[2] INT32 {4, 5, 6, 7} IMM[3] INT32 {8, 0, 0, 0} 0: MOV TEMP[0].w, IMM[0].xxxx 1: MOV TEMP[0].xyz, IN[0].xyzx 2: MUL TEMP[0], CONST[12], IN[0].xxxx 3: MAD TEMP[0], CONST[13], IN[0].yyyy, TEMP[0] 4: MAD TEMP[0], CONST[14], IN[0].zzzz, TEMP[0] 5: ADD TEMP[0], TEMP[0], CONST[15] 6: ADD TEMP[0].x, TEMP[0].zzzz, -CONST[20].xxxx 7: MUL TEMP[0].x, TEMP[0].xxxx, CONST[21].xxxx 8: MUL TEMP[1], CONST[16], IN[0].xxxx 9: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1] 10: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1] 11: ADD OUT[0], TEMP[1], CONST[19] 12: MOV OUT[1].x, IMM[0].xxxx 13: MOV TEMP[1].w, IMM[0].yyyy 14: MOV TEMP[1].xyz, IN[1].xyzx 15: MUL TEMP[1], CONST[8], IN[1].xxxx 16: MAD TEMP[1], CONST[9], IN[1].yyyy, TEMP[1] 17: MAD TEMP[1], CONST[10], IN[1].zzzz, TEMP[1] 18: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 19: RSQ TEMP[2].x, TEMP[2].xxxx 20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 21: MOV TEMP[1].xyz, -TEMP[1].xyzx 22: MUL TEMP[2], CONST[8], IN[0].xxxx 23: MAD TEMP[2], CONST[9], IN[0].yyyy, TEMP[2] 24: MAD TEMP[2], CONST[10], IN[0].zzzz, TEMP[2] 25: ADD TEMP[2], TEMP[2], CONST[11] 26: DP3 TEMP[3].x, TEMP[1].xyzz, CONST[23].xyzz 27: MAX TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 28: MUL TEMP[4], TEMP[3].xxxx, CONST[31] 29: DP3 TEMP[3].x, TEMP[1].xyzz, CONST[24].xyzz 30: MAX TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 31: MAD TEMP[4], TEMP[3].xxxx, CONST[32], TEMP[4] 32: DP3 TEMP[3].x, TEMP[1].xyzz, CONST[25].xyzz 33: MAX TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 34: MAD TEMP[4], TEMP[3].xxxx, CONST[33], TEMP[4] 35: DP3 TEMP[3].x, TEMP[1].xyzz, CONST[26].xyzz 36: MAX TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 37: MAD TEMP[4], TEMP[3].xxxx, CONST[34], TEMP[4] 38: DP3 TEMP[3].x, TEMP[1].xyzz, CONST[27].xyzz 39: MAX TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 40: MAD TEMP[4], TEMP[3].xxxx, CONST[35], TEMP[4] 41: DP3 TEMP[3].x, TEMP[1].xyzz, CONST[28].xyzz 42: MAX TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 43: MAD TEMP[4], TEMP[3].xxxx, CONST[36], TEMP[4] 44: DP3 TEMP[3].x, TEMP[1].xyzz, CONST[29].xyzz 45: MAX TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 46: MAD TEMP[4], TEMP[3].xxxx, CONST[37], TEMP[4] 47: DP3 TEMP[3].x, TEMP[1].xyzz, CONST[30].xyzz 48: MAX TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 49: MAD TEMP[4], TEMP[3].xxxx, CONST[38], TEMP[4] 50: MOV TEMP[3].x, IMM[3].xxxx 51: MOV TEMP[3].x, IMM[1].xxxx 52: BGNLOOP 53: ISGE TEMP[5].x, TEMP[3].xxxx, IMM[3].xxxx 54: UIF TEMP[5].xxxx 55: BRK 56: ENDIF 57: UARL ADDR[0].x, TEMP[3].xxxx 58: MOV TEMP[6], CONST[ADDR[0].x+39] 59: UARL ADDR[0].x, TEMP[3].xxxx 60: MOV TEMP[7], CONST[ADDR[0].x+47] 61: ADD TEMP[8].xyz, TEMP[2].xyzz, -TEMP[6].xyzz 62: DP3 TEMP[9].x, TEMP[8].xyzz, TEMP[8].xyzz 63: SQRT TEMP[10].x, TEMP[9].xxxx 64: RCP TEMP[11].x, TEMP[10].xxxx 65: MUL TEMP[12].xyz, TEMP[8].xyzz, TEMP[11].xxxx 66: FSLT TEMP[13].x, TEMP[10].xxxx, TEMP[6].wwww 67: UCMP TEMP[14].x, TEMP[13].xxxx, IMM[0].xxxx, TEMP[14].xxxx 68: NOT TEMP[15].x, TEMP[13].xxxx 69: UCMP TEMP[14].x, TEMP[15].xxxx, IMM[0].yyyy, TEMP[14].xxxx 70: DP3 TEMP[16].x, TEMP[1].xyzz, TEMP[12].xyzz 71: MAX TEMP[17].x, IMM[0].yyyy, TEMP[16].xxxx 72: MUL TEMP[18].x, TEMP[17].xxxx, TEMP[14].xxxx 73: MAD TEMP[4], TEMP[18].xxxx, TEMP[7], TEMP[4] 74: UADD TEMP[3].x, TEMP[3].xxxx, IMM[1].yyyy 75: ENDLOOP 76: MUL TEMP[4], TEMP[4], IN[3] 77: ADD TEMP[4], TEMP[4], CONST[22] 78: MIN TEMP[1].xyz, IMM[0].xxxx, TEMP[4] 79: MOV TEMP[4].xyz, TEMP[1].xyzx 80: MOV TEMP[4].w, IN[3].wwww 81: MOV OUT[2], TEMP[4] 82: MOV OUT[3].z, TEMP[0].xxxx 83: MOV OUT[3].xy, IN[2].xyxx 84: END radeonsi: Compiling shader 21 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #0 { main_body: %18 = bitcast [16 x <16 x i8>] addrspace(2)* %5 to <4 x i32> addrspace(2)* %19 = load <4 x i32>, <4 x i32> addrspace(2)* %18, align 16, !invariant.load !0 %20 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %19, i32 %14, i32 0, i1 false, i1 false) #2 %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = extractelement <4 x float> %20, i32 2 %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %25 = bitcast <16 x i8> addrspace(2)* %24 to <4 x i32> addrspace(2)* %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !invariant.load !0 %27 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %26, i32 %15, i32 0, i1 false, i1 false) #2 %28 = extractelement <4 x float> %27, i32 0 %29 = extractelement <4 x float> %27, i32 1 %30 = extractelement <4 x float> %27, i32 2 %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2, !amdgpu.uniform !0 %32 = bitcast <16 x i8> addrspace(2)* %31 to <4 x i32> addrspace(2)* %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !invariant.load !0 %34 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %33, i32 %16, i32 0, i1 false, i1 false) #2 %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3, !amdgpu.uniform !0 %38 = bitcast <16 x i8> addrspace(2)* %37 to <4 x i32> addrspace(2)* %39 = load <4 x i32>, <4 x i32> addrspace(2)* %38, align 16, !invariant.load !0 %40 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %39, i32 %17, i32 0, i1 false, i1 false) #2 %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !invariant.load !0 %47 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 200) %48 = fmul nsz float %47, %21 %49 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 216) %50 = fmul nsz float %49, %22 %51 = fadd nsz float %50, %48 %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !invariant.load !0 %54 = call nsz float @llvm.SI.load.const(<16 x i8> %53, i32 232) %55 = fmul nsz float %54, %23 %56 = fadd nsz float %55, %51 %57 = call nsz float @llvm.SI.load.const(<16 x i8> %53, i32 248) %58 = fadd nsz float %56, %57 %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !invariant.load !0 %61 = call nsz float @llvm.SI.load.const(<16 x i8> %60, i32 320) %62 = fsub nsz float %58, %61 %63 = call nsz float @llvm.SI.load.const(<16 x i8> %60, i32 336) %64 = fmul nsz float %62, %63 %65 = call nsz float @llvm.SI.load.const(<16 x i8> %60, i32 256) %66 = fmul nsz float %65, %21 %67 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !invariant.load !0 %69 = call nsz float @llvm.SI.load.const(<16 x i8> %68, i32 260) %70 = fmul nsz float %69, %21 %71 = call nsz float @llvm.SI.load.const(<16 x i8> %68, i32 264) %72 = fmul nsz float %71, %21 %73 = call nsz float @llvm.SI.load.const(<16 x i8> %68, i32 268) %74 = fmul nsz float %73, %21 %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !invariant.load !0 %77 = call nsz float @llvm.SI.load.const(<16 x i8> %76, i32 272) %78 = fmul nsz float %77, %22 %79 = fadd nsz float %78, %66 %80 = call nsz float @llvm.SI.load.const(<16 x i8> %76, i32 276) %81 = fmul nsz float %80, %22 %82 = fadd nsz float %81, %70 %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !invariant.load !0 %85 = call nsz float @llvm.SI.load.const(<16 x i8> %84, i32 280) %86 = fmul nsz float %85, %22 %87 = fadd nsz float %86, %72 %88 = call nsz float @llvm.SI.load.const(<16 x i8> %84, i32 284) %89 = fmul nsz float %88, %22 %90 = fadd nsz float %89, %74 %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !invariant.load !0 %93 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 288) %94 = fmul nsz float %93, %23 %95 = fadd nsz float %94, %79 %96 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 292) %97 = fmul nsz float %96, %23 %98 = fadd nsz float %97, %82 %99 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !invariant.load !0 %101 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 296) %102 = fmul nsz float %101, %23 %103 = fadd nsz float %102, %87 %104 = call nsz float @llvm.SI.load.const(<16 x i8> %100, i32 300) %105 = fmul nsz float %104, %23 %106 = fadd nsz float %105, %90 %107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !invariant.load !0 %109 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 304) %110 = fadd nsz float %95, %109 %111 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 308) %112 = fadd nsz float %98, %111 %113 = call nsz float @llvm.SI.load.const(<16 x i8> %108, i32 312) %114 = fadd nsz float %103, %113 %115 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %116 = load <16 x i8>, <16 x i8> addrspace(2)* %115, align 16, !invariant.load !0 %117 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 316) %118 = fadd nsz float %106, %117 %119 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 128) %120 = fmul nsz float %119, %28 %121 = call nsz float @llvm.SI.load.const(<16 x i8> %116, i32 132) %122 = fmul nsz float %121, %28 %123 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %124 = load <16 x i8>, <16 x i8> addrspace(2)* %123, align 16, !invariant.load !0 %125 = call nsz float @llvm.SI.load.const(<16 x i8> %124, i32 136) %126 = fmul nsz float %125, %28 %127 = call nsz float @llvm.SI.load.const(<16 x i8> %124, i32 144) %128 = fmul nsz float %127, %29 %129 = fadd nsz float %128, %120 %130 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %131 = load <16 x i8>, <16 x i8> addrspace(2)* %130, align 16, !invariant.load !0 %132 = call nsz float @llvm.SI.load.const(<16 x i8> %131, i32 148) %133 = fmul nsz float %132, %29 %134 = fadd nsz float %133, %122 %135 = call nsz float @llvm.SI.load.const(<16 x i8> %131, i32 152) %136 = fmul nsz float %135, %29 %137 = fadd nsz float %136, %126 %138 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %139 = load <16 x i8>, <16 x i8> addrspace(2)* %138, align 16, !invariant.load !0 %140 = call nsz float @llvm.SI.load.const(<16 x i8> %139, i32 160) %141 = fmul nsz float %140, %30 %142 = fadd nsz float %141, %129 %143 = call nsz float @llvm.SI.load.const(<16 x i8> %139, i32 164) %144 = fmul nsz float %143, %30 %145 = fadd nsz float %144, %134 %146 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %147 = load <16 x i8>, <16 x i8> addrspace(2)* %146, align 16, !invariant.load !0 %148 = call nsz float @llvm.SI.load.const(<16 x i8> %147, i32 168) %149 = fmul nsz float %148, %30 %150 = fadd nsz float %149, %137 %151 = fmul nsz float %142, %142 %152 = fmul nsz float %145, %145 %153 = fadd nsz float %152, %151 %154 = fmul nsz float %150, %150 %155 = fadd nsz float %153, %154 %156 = call nsz float @llvm.sqrt.f32(float %155) #2 %157 = fdiv nsz float 1.000000e+00, %156, !fpmath !1 %158 = fmul nsz float %142, %157 %159 = fmul nsz float %145, %157 %160 = fmul nsz float %150, %157 %161 = fsub nsz float -0.000000e+00, %158 %162 = fsub nsz float -0.000000e+00, %159 %163 = fsub nsz float -0.000000e+00, %160 %164 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %165 = load <16 x i8>, <16 x i8> addrspace(2)* %164, align 16, !invariant.load !0 %166 = call nsz float @llvm.SI.load.const(<16 x i8> %165, i32 128) %167 = fmul nsz float %166, %21 %168 = call nsz float @llvm.SI.load.const(<16 x i8> %165, i32 132) %169 = fmul nsz float %168, %21 %170 = call nsz float @llvm.SI.load.const(<16 x i8> %165, i32 136) %171 = fmul nsz float %170, %21 %172 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %173 = load <16 x i8>, <16 x i8> addrspace(2)* %172, align 16, !invariant.load !0 %174 = call nsz float @llvm.SI.load.const(<16 x i8> %173, i32 144) %175 = fmul nsz float %174, %22 %176 = fadd nsz float %175, %167 %177 = call nsz float @llvm.SI.load.const(<16 x i8> %173, i32 148) %178 = fmul nsz float %177, %22 %179 = fadd nsz float %178, %169 %180 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %181 = load <16 x i8>, <16 x i8> addrspace(2)* %180, align 16, !invariant.load !0 %182 = call nsz float @llvm.SI.load.const(<16 x i8> %181, i32 152) %183 = fmul nsz float %182, %22 %184 = fadd nsz float %183, %171 %185 = call nsz float @llvm.SI.load.const(<16 x i8> %181, i32 160) %186 = fmul nsz float %185, %23 %187 = fadd nsz float %186, %176 %188 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %189 = load <16 x i8>, <16 x i8> addrspace(2)* %188, align 16, !invariant.load !0 %190 = call nsz float @llvm.SI.load.const(<16 x i8> %189, i32 164) %191 = fmul nsz float %190, %23 %192 = fadd nsz float %191, %179 %193 = call nsz float @llvm.SI.load.const(<16 x i8> %189, i32 168) %194 = fmul nsz float %193, %23 %195 = fadd nsz float %194, %184 %196 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %197 = load <16 x i8>, <16 x i8> addrspace(2)* %196, align 16, !invariant.load !0 %198 = call nsz float @llvm.SI.load.const(<16 x i8> %197, i32 176) %199 = fadd nsz float %187, %198 %200 = call nsz float @llvm.SI.load.const(<16 x i8> %197, i32 180) %201 = fadd nsz float %192, %200 %202 = call nsz float @llvm.SI.load.const(<16 x i8> %197, i32 184) %203 = fadd nsz float %195, %202 %204 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %205 = load <16 x i8>, <16 x i8> addrspace(2)* %204, align 16, !invariant.load !0 %206 = call nsz float @llvm.SI.load.const(<16 x i8> %205, i32 368) %207 = call nsz float @llvm.SI.load.const(<16 x i8> %205, i32 372) %208 = call nsz float @llvm.SI.load.const(<16 x i8> %205, i32 376) %209 = fmul nsz float %206, %161 %210 = fmul nsz float %207, %162 %211 = fadd nsz float %210, %209 %212 = fmul nsz float %208, %163 %213 = fadd nsz float %211, %212 %214 = call nsz float @llvm.maxnum.f32(float %213, float 0.000000e+00) #2 %215 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %216 = load <16 x i8>, <16 x i8> addrspace(2)* %215, align 16, !invariant.load !0 %217 = call nsz float @llvm.SI.load.const(<16 x i8> %216, i32 496) %218 = fmul nsz float %214, %217 %219 = call nsz float @llvm.SI.load.const(<16 x i8> %216, i32 500) %220 = fmul nsz float %214, %219 %221 = call nsz float @llvm.SI.load.const(<16 x i8> %216, i32 504) %222 = fmul nsz float %214, %221 %223 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %224 = load <16 x i8>, <16 x i8> addrspace(2)* %223, align 16, !invariant.load !0 %225 = call nsz float @llvm.SI.load.const(<16 x i8> %224, i32 384) %226 = call nsz float @llvm.SI.load.const(<16 x i8> %224, i32 388) %227 = call nsz float @llvm.SI.load.const(<16 x i8> %224, i32 392) %228 = fmul nsz float %225, %161 %229 = fmul nsz float %226, %162 %230 = fadd nsz float %229, %228 %231 = fmul nsz float %227, %163 %232 = fadd nsz float %230, %231 %233 = call nsz float @llvm.maxnum.f32(float %232, float 0.000000e+00) #2 %234 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %235 = load <16 x i8>, <16 x i8> addrspace(2)* %234, align 16, !invariant.load !0 %236 = call nsz float @llvm.SI.load.const(<16 x i8> %235, i32 512) %237 = fmul nsz float %233, %236 %238 = fadd nsz float %237, %218 %239 = call nsz float @llvm.SI.load.const(<16 x i8> %235, i32 516) %240 = fmul nsz float %233, %239 %241 = fadd nsz float %240, %220 %242 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %243 = load <16 x i8>, <16 x i8> addrspace(2)* %242, align 16, !invariant.load !0 %244 = call nsz float @llvm.SI.load.const(<16 x i8> %243, i32 520) %245 = fmul nsz float %233, %244 %246 = fadd nsz float %245, %222 %247 = call nsz float @llvm.SI.load.const(<16 x i8> %243, i32 400) %248 = call nsz float @llvm.SI.load.const(<16 x i8> %243, i32 404) %249 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %250 = load <16 x i8>, <16 x i8> addrspace(2)* %249, align 16, !invariant.load !0 %251 = call nsz float @llvm.SI.load.const(<16 x i8> %250, i32 408) %252 = fmul nsz float %247, %161 %253 = fmul nsz float %248, %162 %254 = fadd nsz float %253, %252 %255 = fmul nsz float %251, %163 %256 = fadd nsz float %254, %255 %257 = call nsz float @llvm.maxnum.f32(float %256, float 0.000000e+00) #2 %258 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %259 = load <16 x i8>, <16 x i8> addrspace(2)* %258, align 16, !invariant.load !0 %260 = call nsz float @llvm.SI.load.const(<16 x i8> %259, i32 528) %261 = fmul nsz float %257, %260 %262 = fadd nsz float %261, %238 %263 = call nsz float @llvm.SI.load.const(<16 x i8> %259, i32 532) %264 = fmul nsz float %257, %263 %265 = fadd nsz float %264, %241 %266 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %267 = load <16 x i8>, <16 x i8> addrspace(2)* %266, align 16, !invariant.load !0 %268 = call nsz float @llvm.SI.load.const(<16 x i8> %267, i32 536) %269 = fmul nsz float %257, %268 %270 = fadd nsz float %269, %246 %271 = call nsz float @llvm.SI.load.const(<16 x i8> %267, i32 416) %272 = call nsz float @llvm.SI.load.const(<16 x i8> %267, i32 420) %273 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %274 = load <16 x i8>, <16 x i8> addrspace(2)* %273, align 16, !invariant.load !0 %275 = call nsz float @llvm.SI.load.const(<16 x i8> %274, i32 424) %276 = fmul nsz float %271, %161 %277 = fmul nsz float %272, %162 %278 = fadd nsz float %277, %276 %279 = fmul nsz float %275, %163 %280 = fadd nsz float %278, %279 %281 = call nsz float @llvm.maxnum.f32(float %280, float 0.000000e+00) #2 %282 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %283 = load <16 x i8>, <16 x i8> addrspace(2)* %282, align 16, !invariant.load !0 %284 = call nsz float @llvm.SI.load.const(<16 x i8> %283, i32 544) %285 = fmul nsz float %281, %284 %286 = fadd nsz float %285, %262 %287 = call nsz float @llvm.SI.load.const(<16 x i8> %283, i32 548) %288 = fmul nsz float %281, %287 %289 = fadd nsz float %288, %265 %290 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %291 = load <16 x i8>, <16 x i8> addrspace(2)* %290, align 16, !invariant.load !0 %292 = call nsz float @llvm.SI.load.const(<16 x i8> %291, i32 552) %293 = fmul nsz float %281, %292 %294 = fadd nsz float %293, %270 %295 = call nsz float @llvm.SI.load.const(<16 x i8> %291, i32 432) %296 = call nsz float @llvm.SI.load.const(<16 x i8> %291, i32 436) %297 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %298 = load <16 x i8>, <16 x i8> addrspace(2)* %297, align 16, !invariant.load !0 %299 = call nsz float @llvm.SI.load.const(<16 x i8> %298, i32 440) %300 = fmul nsz float %295, %161 %301 = fmul nsz float %296, %162 %302 = fadd nsz float %301, %300 %303 = fmul nsz float %299, %163 %304 = fadd nsz float %302, %303 %305 = call nsz float @llvm.maxnum.f32(float %304, float 0.000000e+00) #2 %306 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %307 = load <16 x i8>, <16 x i8> addrspace(2)* %306, align 16, !invariant.load !0 %308 = call nsz float @llvm.SI.load.const(<16 x i8> %307, i32 560) %309 = fmul nsz float %305, %308 %310 = fadd nsz float %309, %286 %311 = call nsz float @llvm.SI.load.const(<16 x i8> %307, i32 564) %312 = fmul nsz float %305, %311 %313 = fadd nsz float %312, %289 %314 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %315 = load <16 x i8>, <16 x i8> addrspace(2)* %314, align 16, !invariant.load !0 %316 = call nsz float @llvm.SI.load.const(<16 x i8> %315, i32 568) %317 = fmul nsz float %305, %316 %318 = fadd nsz float %317, %294 %319 = call nsz float @llvm.SI.load.const(<16 x i8> %315, i32 448) %320 = call nsz float @llvm.SI.load.const(<16 x i8> %315, i32 452) %321 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %322 = load <16 x i8>, <16 x i8> addrspace(2)* %321, align 16, !invariant.load !0 %323 = call nsz float @llvm.SI.load.const(<16 x i8> %322, i32 456) %324 = fmul nsz float %319, %161 %325 = fmul nsz float %320, %162 %326 = fadd nsz float %325, %324 %327 = fmul nsz float %323, %163 %328 = fadd nsz float %326, %327 %329 = call nsz float @llvm.maxnum.f32(float %328, float 0.000000e+00) #2 %330 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %331 = load <16 x i8>, <16 x i8> addrspace(2)* %330, align 16, !invariant.load !0 %332 = call nsz float @llvm.SI.load.const(<16 x i8> %331, i32 576) %333 = fmul nsz float %329, %332 %334 = fadd nsz float %333, %310 %335 = call nsz float @llvm.SI.load.const(<16 x i8> %331, i32 580) %336 = fmul nsz float %329, %335 %337 = fadd nsz float %336, %313 %338 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %339 = load <16 x i8>, <16 x i8> addrspace(2)* %338, align 16, !invariant.load !0 %340 = call nsz float @llvm.SI.load.const(<16 x i8> %339, i32 584) %341 = fmul nsz float %329, %340 %342 = fadd nsz float %341, %318 %343 = call nsz float @llvm.SI.load.const(<16 x i8> %339, i32 464) %344 = call nsz float @llvm.SI.load.const(<16 x i8> %339, i32 468) %345 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %346 = load <16 x i8>, <16 x i8> addrspace(2)* %345, align 16, !invariant.load !0 %347 = call nsz float @llvm.SI.load.const(<16 x i8> %346, i32 472) %348 = fmul nsz float %343, %161 %349 = fmul nsz float %344, %162 %350 = fadd nsz float %349, %348 %351 = fmul nsz float %347, %163 %352 = fadd nsz float %350, %351 %353 = call nsz float @llvm.maxnum.f32(float %352, float 0.000000e+00) #2 %354 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %355 = load <16 x i8>, <16 x i8> addrspace(2)* %354, align 16, !invariant.load !0 %356 = call nsz float @llvm.SI.load.const(<16 x i8> %355, i32 592) %357 = fmul nsz float %353, %356 %358 = fadd nsz float %357, %334 %359 = call nsz float @llvm.SI.load.const(<16 x i8> %355, i32 596) %360 = fmul nsz float %353, %359 %361 = fadd nsz float %360, %337 %362 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %363 = load <16 x i8>, <16 x i8> addrspace(2)* %362, align 16, !invariant.load !0 %364 = call nsz float @llvm.SI.load.const(<16 x i8> %363, i32 600) %365 = fmul nsz float %353, %364 %366 = fadd nsz float %365, %342 %367 = call nsz float @llvm.SI.load.const(<16 x i8> %363, i32 480) %368 = call nsz float @llvm.SI.load.const(<16 x i8> %363, i32 484) %369 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %370 = load <16 x i8>, <16 x i8> addrspace(2)* %369, align 16, !invariant.load !0 %371 = call nsz float @llvm.SI.load.const(<16 x i8> %370, i32 488) %372 = fmul nsz float %367, %161 %373 = fmul nsz float %368, %162 %374 = fadd nsz float %373, %372 %375 = fmul nsz float %371, %163 %376 = fadd nsz float %374, %375 %377 = call nsz float @llvm.maxnum.f32(float %376, float 0.000000e+00) #2 %378 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %379 = load <16 x i8>, <16 x i8> addrspace(2)* %378, align 16, !invariant.load !0 %380 = call nsz float @llvm.SI.load.const(<16 x i8> %379, i32 608) %381 = fmul nsz float %377, %380 %382 = fadd nsz float %381, %358 %383 = call nsz float @llvm.SI.load.const(<16 x i8> %379, i32 612) %384 = fmul nsz float %377, %383 %385 = fadd nsz float %384, %361 %386 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %387 = load <16 x i8>, <16 x i8> addrspace(2)* %386, align 16 %388 = call nsz float @llvm.SI.load.const(<16 x i8> %387, i32 616) %389 = fmul nsz float %377, %388 %390 = fadd nsz float %389, %366 br label %loop52 loop52: ; preds = %endif56, %main_body %.010 = phi float [ %390, %main_body ], [ %444, %endif56 ] %.09 = phi float [ %385, %main_body ], [ %442, %endif56 ] %.08 = phi float [ %382, %main_body ], [ %440, %endif56 ] %391 = phi i32 [ 0, %main_body ], [ %445, %endif56 ] %392 = phi i32 [ 0, %main_body ], [ %445, %endif56 ] %393 = phi i32 [ 0, %main_body ], [ %445, %endif56 ] %394 = phi i32 [ 0, %main_body ], [ %445, %endif56 ] %395 = icmp sgt i32 %391, 7 br i1 %395, label %endloop75, label %endif56 endif56: ; preds = %loop52 %396 = shl i32 %392, 4 %397 = add i32 %396, 624 %398 = call nsz float @llvm.SI.load.const(<16 x i8> %387, i32 %397) %399 = shl i32 %392, 4 %400 = add i32 %399, 628 %401 = call nsz float @llvm.SI.load.const(<16 x i8> %387, i32 %400) %402 = shl i32 %392, 4 %403 = add i32 %402, 632 %404 = call nsz float @llvm.SI.load.const(<16 x i8> %387, i32 %403) %405 = shl i32 %392, 4 %406 = add i32 %405, 636 %407 = call nsz float @llvm.SI.load.const(<16 x i8> %387, i32 %406) %408 = shl i32 %393, 4 %409 = add i32 %408, 752 %410 = call nsz float @llvm.SI.load.const(<16 x i8> %387, i32 %409) %411 = shl i32 %393, 4 %412 = add i32 %411, 756 %413 = call nsz float @llvm.SI.load.const(<16 x i8> %387, i32 %412) %414 = shl i32 %393, 4 %415 = add i32 %414, 760 %416 = call nsz float @llvm.SI.load.const(<16 x i8> %387, i32 %415) %417 = fsub nsz float %199, %398 %418 = fsub nsz float %201, %401 %419 = fsub nsz float %203, %404 %420 = fmul nsz float %417, %417 %421 = fmul nsz float %418, %418 %422 = fadd nsz float %421, %420 %423 = fmul nsz float %419, %419 %424 = fadd nsz float %422, %423 %425 = call nsz float @llvm.sqrt.f32(float %424) #2 %426 = fdiv nsz float 1.000000e+00, %425, !fpmath !1 %427 = fmul nsz float %417, %426 %428 = fmul nsz float %418, %426 %429 = fmul nsz float %419, %426 %430 = fcmp nsz olt float %425, %407 %431 = select i1 %430, float 1.000000e+00, float 0.000000e+00 %432 = fmul nsz float %427, %161 %433 = fmul nsz float %428, %162 %434 = fadd nsz float %433, %432 %435 = fmul nsz float %429, %163 %436 = fadd nsz float %434, %435 %437 = call nsz float @llvm.maxnum.f32(float %436, float 0.000000e+00) #2 %438 = fmul nsz float %437, %431 %439 = fmul nsz float %438, %410 %440 = fadd nsz float %439, %.08 %441 = fmul nsz float %438, %413 %442 = fadd nsz float %441, %.09 %443 = fmul nsz float %438, %416 %444 = fadd nsz float %443, %.010 %445 = add i32 %394, 1 br label %loop52 endloop75: ; preds = %loop52 %446 = fmul nsz float %.08, %41 %447 = fmul nsz float %.09, %42 %448 = fmul nsz float %.010, %43 %449 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %450 = load <16 x i8>, <16 x i8> addrspace(2)* %449, align 16, !invariant.load !0 %451 = call nsz float @llvm.SI.load.const(<16 x i8> %450, i32 352) %452 = fadd nsz float %446, %451 %453 = call nsz float @llvm.SI.load.const(<16 x i8> %450, i32 356) %454 = fadd nsz float %447, %453 %455 = call nsz float @llvm.SI.load.const(<16 x i8> %450, i32 360) %456 = fadd nsz float %448, %455 %457 = call nsz float @llvm.minnum.f32(float %452, float 1.000000e+00) #2 %458 = call nsz float @llvm.minnum.f32(float %454, float 1.000000e+00) #2 %459 = call nsz float @llvm.minnum.f32(float %456, float 1.000000e+00) #2 %460 = bitcast i32 %12 to float %461 = insertvalue <{ float, float, float }> undef, float %460, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %457, float %458, float %459, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %35, float %36, float %64, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %110, float %112, float %114, float %118) call void @llvm.SI.export(i32 1, i32 0, i32 1, i32 13, i32 0, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret <{ float, float, float }> %461 } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #2 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #3 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } attributes #3 = { nounwind } !0 = !{} !1 = !{float 2.500000e+00} VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1].x, PSIZE DCL OUT[2], GENERIC[0] DCL CONST[0..19] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[16], IN[0].xxxx 1: MAD TEMP[0], CONST[17], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[18], IN[0].zzzz, TEMP[0] 3: ADD OUT[0], TEMP[0], CONST[19] 4: MOV OUT[1].x, IMM[0].xxxx 5: MOV OUT[2], IN[1] 6: END radeonsi: Compiling shader 22 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) #0 { main_body: %16 = bitcast [16 x <16 x i8>] addrspace(2)* %5 to <4 x i32> addrspace(2)* %17 = load <4 x i32>, <4 x i32> addrspace(2)* %16, align 16, !invariant.load !0 %18 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %17, i32 %14, i32 0, i1 false, i1 false) #2 %19 = extractelement <4 x float> %18, i32 0 %20 = extractelement <4 x float> %18, i32 1 %21 = extractelement <4 x float> %18, i32 2 %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %23 = bitcast <16 x i8> addrspace(2)* %22 to <4 x i32> addrspace(2)* %24 = load <4 x i32>, <4 x i32> addrspace(2)* %23, align 16, !invariant.load !0 %25 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %24, i32 %15, i32 0, i1 false, i1 false) #2 %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = extractelement <4 x float> %25, i32 2 %29 = extractelement <4 x float> %25, i32 3 %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !invariant.load !0 %32 = call nsz float @llvm.SI.load.const(<16 x i8> %31, i32 256) %33 = fmul nsz float %32, %19 %34 = call nsz float @llvm.SI.load.const(<16 x i8> %31, i32 260) %35 = fmul nsz float %34, %19 %36 = call nsz float @llvm.SI.load.const(<16 x i8> %31, i32 264) %37 = fmul nsz float %36, %19 %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !invariant.load !0 %40 = call nsz float @llvm.SI.load.const(<16 x i8> %39, i32 268) %41 = fmul nsz float %40, %19 %42 = call nsz float @llvm.SI.load.const(<16 x i8> %39, i32 272) %43 = fmul nsz float %42, %20 %44 = fadd nsz float %43, %33 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !invariant.load !0 %47 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 276) %48 = fmul nsz float %47, %20 %49 = fadd nsz float %48, %35 %50 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 280) %51 = fmul nsz float %50, %20 %52 = fadd nsz float %51, %37 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !invariant.load !0 %55 = call nsz float @llvm.SI.load.const(<16 x i8> %54, i32 284) %56 = fmul nsz float %55, %20 %57 = fadd nsz float %56, %41 %58 = call nsz float @llvm.SI.load.const(<16 x i8> %54, i32 288) %59 = fmul nsz float %58, %21 %60 = fadd nsz float %59, %44 %61 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !invariant.load !0 %63 = call nsz float @llvm.SI.load.const(<16 x i8> %62, i32 292) %64 = fmul nsz float %63, %21 %65 = fadd nsz float %64, %49 %66 = call nsz float @llvm.SI.load.const(<16 x i8> %62, i32 296) %67 = fmul nsz float %66, %21 %68 = fadd nsz float %67, %52 %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !invariant.load !0 %71 = call nsz float @llvm.SI.load.const(<16 x i8> %70, i32 300) %72 = fmul nsz float %71, %21 %73 = fadd nsz float %72, %57 %74 = call nsz float @llvm.SI.load.const(<16 x i8> %70, i32 304) %75 = fadd nsz float %60, %74 %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !invariant.load !0 %78 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 308) %79 = fadd nsz float %65, %78 %80 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 312) %81 = fadd nsz float %68, %80 %82 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 316) %83 = fadd nsz float %73, %82 %84 = bitcast i32 %12 to float %85 = insertvalue <{ float, float, float }> undef, float %84, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %26, float %27, float %28, float %29) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %75, float %79, float %81, float %83) call void @llvm.SI.export(i32 1, i32 0, i32 1, i32 13, i32 0, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret <{ float, float, float }> %85 } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #2 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #3 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } attributes #3 = { nounwind } !0 = !{} FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END radeonsi: Compiling shader 23 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = bitcast <2 x i32> %8 to <2 x float> %24 = extractelement <2 x float> %23, i32 0 %25 = extractelement <2 x float> %23, i32 1 %26 = call nsz float @llvm.amdgcn.interp.p1(float %24, i32 0, i32 0, i32 %6) #1 %27 = call nsz float @llvm.amdgcn.interp.p2(float %26, float %25, i32 0, i32 0, i32 %6) #1 %28 = bitcast <2 x i32> %8 to <2 x float> %29 = extractelement <2 x float> %28, i32 0 %30 = extractelement <2 x float> %28, i32 1 %31 = call nsz float @llvm.amdgcn.interp.p1(float %29, i32 1, i32 0, i32 %6) #1 %32 = call nsz float @llvm.amdgcn.interp.p2(float %31, float %30, i32 1, i32 0, i32 %6) #1 %33 = bitcast <2 x i32> %8 to <2 x float> %34 = extractelement <2 x float> %33, i32 0 %35 = extractelement <2 x float> %33, i32 1 %36 = call nsz float @llvm.amdgcn.interp.p1(float %34, i32 2, i32 0, i32 %6) #1 %37 = call nsz float @llvm.amdgcn.interp.p2(float %36, float %35, i32 2, i32 0, i32 %6) #1 %38 = bitcast <2 x i32> %8 to <2 x float> %39 = extractelement <2 x float> %38, i32 0 %40 = extractelement <2 x float> %38, i32 1 %41 = call nsz float @llvm.amdgcn.interp.p1(float %39, i32 3, i32 0, i32 %6) #1 %42 = call nsz float @llvm.amdgcn.interp.p2(float %41, float %40, i32 3, i32 0, i32 %6) #1 %43 = bitcast float %5 to i32 %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %43, 10 %45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float %27, 11 %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45, float %32, 12 %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46, float %37, 13 %48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %42, 14 %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %48, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49 } ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1].x, PSIZE DCL OUT[2], GENERIC[0] DCL OUT[3].x, GENERIC[1] DCL CONST[0..21] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[16], IN[0].xxxx 1: MAD TEMP[0], CONST[17], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[18], IN[0].zzzz, TEMP[0] 3: ADD OUT[0], TEMP[0], CONST[19] 4: MOV OUT[1].x, IMM[0].xxxx 5: MOV OUT[2], IN[1] 6: MUL TEMP[0], CONST[12], IN[0].xxxx 7: MAD TEMP[0], CONST[13], IN[0].yyyy, TEMP[0] 8: MAD TEMP[0], CONST[14], IN[0].zzzz, TEMP[0] 9: ADD TEMP[0].z, TEMP[0], CONST[15] 10: ADD TEMP[0].x, TEMP[0].zzzz, -CONST[20].xxxx 11: MUL OUT[3].x, TEMP[0].xxxx, CONST[21].xxxx 12: END radeonsi: Compiling shader 24 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) #0 { main_body: %16 = bitcast [16 x <16 x i8>] addrspace(2)* %5 to <4 x i32> addrspace(2)* %17 = load <4 x i32>, <4 x i32> addrspace(2)* %16, align 16, !invariant.load !0 %18 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %17, i32 %14, i32 0, i1 false, i1 false) #2 %19 = extractelement <4 x float> %18, i32 0 %20 = extractelement <4 x float> %18, i32 1 %21 = extractelement <4 x float> %18, i32 2 %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %23 = bitcast <16 x i8> addrspace(2)* %22 to <4 x i32> addrspace(2)* %24 = load <4 x i32>, <4 x i32> addrspace(2)* %23, align 16, !invariant.load !0 %25 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %24, i32 %15, i32 0, i1 false, i1 false) #2 %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = extractelement <4 x float> %25, i32 2 %29 = extractelement <4 x float> %25, i32 3 %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !invariant.load !0 %32 = call nsz float @llvm.SI.load.const(<16 x i8> %31, i32 256) %33 = fmul nsz float %32, %19 %34 = call nsz float @llvm.SI.load.const(<16 x i8> %31, i32 260) %35 = fmul nsz float %34, %19 %36 = call nsz float @llvm.SI.load.const(<16 x i8> %31, i32 264) %37 = fmul nsz float %36, %19 %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !invariant.load !0 %40 = call nsz float @llvm.SI.load.const(<16 x i8> %39, i32 268) %41 = fmul nsz float %40, %19 %42 = call nsz float @llvm.SI.load.const(<16 x i8> %39, i32 272) %43 = fmul nsz float %42, %20 %44 = fadd nsz float %43, %33 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !invariant.load !0 %47 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 276) %48 = fmul nsz float %47, %20 %49 = fadd nsz float %48, %35 %50 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 280) %51 = fmul nsz float %50, %20 %52 = fadd nsz float %51, %37 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !invariant.load !0 %55 = call nsz float @llvm.SI.load.const(<16 x i8> %54, i32 284) %56 = fmul nsz float %55, %20 %57 = fadd nsz float %56, %41 %58 = call nsz float @llvm.SI.load.const(<16 x i8> %54, i32 288) %59 = fmul nsz float %58, %21 %60 = fadd nsz float %59, %44 %61 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !invariant.load !0 %63 = call nsz float @llvm.SI.load.const(<16 x i8> %62, i32 292) %64 = fmul nsz float %63, %21 %65 = fadd nsz float %64, %49 %66 = call nsz float @llvm.SI.load.const(<16 x i8> %62, i32 296) %67 = fmul nsz float %66, %21 %68 = fadd nsz float %67, %52 %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !invariant.load !0 %71 = call nsz float @llvm.SI.load.const(<16 x i8> %70, i32 300) %72 = fmul nsz float %71, %21 %73 = fadd nsz float %72, %57 %74 = call nsz float @llvm.SI.load.const(<16 x i8> %70, i32 304) %75 = fadd nsz float %60, %74 %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !invariant.load !0 %78 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 308) %79 = fadd nsz float %65, %78 %80 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 312) %81 = fadd nsz float %68, %80 %82 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 316) %83 = fadd nsz float %73, %82 %84 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %85 = load <16 x i8>, <16 x i8> addrspace(2)* %84, align 16, !invariant.load !0 %86 = call nsz float @llvm.SI.load.const(<16 x i8> %85, i32 200) %87 = fmul nsz float %86, %19 %88 = call nsz float @llvm.SI.load.const(<16 x i8> %85, i32 216) %89 = fmul nsz float %88, %20 %90 = fadd nsz float %89, %87 %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !invariant.load !0 %93 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 232) %94 = fmul nsz float %93, %21 %95 = fadd nsz float %94, %90 %96 = call nsz float @llvm.SI.load.const(<16 x i8> %92, i32 248) %97 = fadd nsz float %95, %96 %98 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %99 = load <16 x i8>, <16 x i8> addrspace(2)* %98, align 16, !invariant.load !0 %100 = call nsz float @llvm.SI.load.const(<16 x i8> %99, i32 320) %101 = fsub nsz float %97, %100 %102 = call nsz float @llvm.SI.load.const(<16 x i8> %99, i32 336) %103 = fmul nsz float %101, %102 %104 = bitcast i32 %12 to float %105 = insertvalue <{ float, float, float }> undef, float %104, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %26, float %27, float %28, float %29) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %103, float undef, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %75, float %79, float %81, float %83) call void @llvm.SI.export(i32 1, i32 0, i32 1, i32 13, i32 0, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret <{ float, float, float }> %105 } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #2 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #3 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } attributes #3 = { nounwind } !0 = !{} FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1].x, GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0] DCL TEMP[0], LOCAL 0: MOV_SAT TEMP[0].x, IN[1].xxxx 1: LRP TEMP[0].xyz, TEMP[0].xxxx, CONST[0].xyzz, IN[0].xyzz 2: MOV TEMP[0].w, IN[0].wwww 3: MOV OUT[0], TEMP[0] 4: END radeonsi: Compiling shader 25 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = bitcast <2 x i32> %8 to <2 x float> %24 = extractelement <2 x float> %23, i32 0 %25 = extractelement <2 x float> %23, i32 1 %26 = call nsz float @llvm.amdgcn.interp.p1(float %24, i32 0, i32 1, i32 %6) #1 %27 = call nsz float @llvm.amdgcn.interp.p2(float %26, float %25, i32 0, i32 1, i32 %6) #1 %28 = call nsz float @llvm.AMDGPU.clamp.(float %27, float 0.000000e+00, float 1.000000e+00) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !invariant.load !0 %31 = call nsz float @llvm.SI.load.const(<16 x i8> %30, i32 0) %32 = bitcast <2 x i32> %8 to <2 x float> %33 = extractelement <2 x float> %32, i32 0 %34 = extractelement <2 x float> %32, i32 1 %35 = call nsz float @llvm.amdgcn.interp.p1(float %33, i32 0, i32 0, i32 %6) #1 %36 = call nsz float @llvm.amdgcn.interp.p2(float %35, float %34, i32 0, i32 0, i32 %6) #1 %37 = fsub nsz float 1.000000e+00, %28 %38 = fmul nsz float %31, %28 %39 = fmul nsz float %36, %37 %40 = fadd nsz float %38, %39 %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !invariant.load !0 %43 = call nsz float @llvm.SI.load.const(<16 x i8> %42, i32 4) %44 = bitcast <2 x i32> %8 to <2 x float> %45 = extractelement <2 x float> %44, i32 0 %46 = extractelement <2 x float> %44, i32 1 %47 = call nsz float @llvm.amdgcn.interp.p1(float %45, i32 1, i32 0, i32 %6) #1 %48 = call nsz float @llvm.amdgcn.interp.p2(float %47, float %46, i32 1, i32 0, i32 %6) #1 %49 = fsub nsz float 1.000000e+00, %28 %50 = fmul nsz float %43, %28 %51 = fmul nsz float %48, %49 %52 = fadd nsz float %50, %51 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !invariant.load !0 %55 = call nsz float @llvm.SI.load.const(<16 x i8> %54, i32 8) %56 = bitcast <2 x i32> %8 to <2 x float> %57 = extractelement <2 x float> %56, i32 0 %58 = extractelement <2 x float> %56, i32 1 %59 = call nsz float @llvm.amdgcn.interp.p1(float %57, i32 2, i32 0, i32 %6) #1 %60 = call nsz float @llvm.amdgcn.interp.p2(float %59, float %58, i32 2, i32 0, i32 %6) #1 %61 = fsub nsz float 1.000000e+00, %28 %62 = fmul nsz float %55, %28 %63 = fmul nsz float %60, %61 %64 = fadd nsz float %62, %63 %65 = bitcast <2 x i32> %8 to <2 x float> %66 = extractelement <2 x float> %65, i32 0 %67 = extractelement <2 x float> %65, i32 1 %68 = call nsz float @llvm.amdgcn.interp.p1(float %66, i32 3, i32 0, i32 %6) #1 %69 = call nsz float @llvm.amdgcn.interp.p2(float %68, float %67, i32 3, i32 0, i32 %6) #1 %70 = bitcast float %5 to i32 %71 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %70, 10 %72 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %71, float %40, 11 %73 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %72, float %52, 12 %74 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %73, float %64, 13 %75 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %74, float %69, 14 %76 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %75, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %76 } ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.clamp.(float, float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{} FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: FSGE TEMP[0].x, CONST[0].xxxx, IN[0].wwww 1: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 2: KILL_IF -TEMP[0].xxxx 3: MOV OUT[0], IN[0] 4: END radeonsi: Compiling shader 26 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !invariant.load !0 %25 = call nsz float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = bitcast <2 x i32> %8 to <2 x float> %27 = extractelement <2 x float> %26, i32 0 %28 = extractelement <2 x float> %26, i32 1 %29 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 3, i32 0, i32 %6) #1 %30 = call nsz float @llvm.amdgcn.interp.p2(float %29, float %28, i32 3, i32 0, i32 %6) #1 %31 = fcmp nsz oge float %25, %30 %32 = select i1 %31, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %32) %33 = bitcast <2 x i32> %8 to <2 x float> %34 = extractelement <2 x float> %33, i32 0 %35 = extractelement <2 x float> %33, i32 1 %36 = call nsz float @llvm.amdgcn.interp.p1(float %34, i32 0, i32 0, i32 %6) #1 %37 = call nsz float @llvm.amdgcn.interp.p2(float %36, float %35, i32 0, i32 0, i32 %6) #1 %38 = bitcast <2 x i32> %8 to <2 x float> %39 = extractelement <2 x float> %38, i32 0 %40 = extractelement <2 x float> %38, i32 1 %41 = call nsz float @llvm.amdgcn.interp.p1(float %39, i32 1, i32 0, i32 %6) #1 %42 = call nsz float @llvm.amdgcn.interp.p2(float %41, float %40, i32 1, i32 0, i32 %6) #1 %43 = bitcast <2 x i32> %8 to <2 x float> %44 = extractelement <2 x float> %43, i32 0 %45 = extractelement <2 x float> %43, i32 1 %46 = call nsz float @llvm.amdgcn.interp.p1(float %44, i32 2, i32 0, i32 %6) #1 %47 = call nsz float @llvm.amdgcn.interp.p2(float %46, float %45, i32 2, i32 0, i32 %6) #1 %48 = bitcast <2 x i32> %8 to <2 x float> %49 = extractelement <2 x float> %48, i32 0 %50 = extractelement <2 x float> %48, i32 1 %51 = call nsz float @llvm.amdgcn.interp.p1(float %49, i32 3, i32 0, i32 %6) #1 %52 = call nsz float @llvm.amdgcn.interp.p2(float %51, float %50, i32 3, i32 0, i32 %6) #1 %53 = bitcast float %5 to i32 %54 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %53, 10 %55 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %54, float %37, 11 %56 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %55, float %42, 12 %57 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %56, float %47, 13 %58 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %57, float %52, 14 %59 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %58, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %59 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind declare void @llvm.AMDGPU.kill(float) #2 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind } !0 = !{} FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1].x, GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0..1] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: FSGE TEMP[0].x, CONST[1].xxxx, IN[0].wwww 1: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 2: KILL_IF -TEMP[0].xxxx 3: MOV_SAT TEMP[0].x, IN[1].xxxx 4: LRP TEMP[0].xyz, TEMP[0].xxxx, CONST[0].xyzz, IN[0].xyzz 5: MOV TEMP[0].w, IN[0].wwww 6: MOV OUT[0], TEMP[0] 7: END radeonsi: Compiling shader 27 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !invariant.load !0 %25 = call nsz float @llvm.SI.load.const(<16 x i8> %24, i32 16) %26 = bitcast <2 x i32> %8 to <2 x float> %27 = extractelement <2 x float> %26, i32 0 %28 = extractelement <2 x float> %26, i32 1 %29 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 3, i32 0, i32 %6) #1 %30 = call nsz float @llvm.amdgcn.interp.p2(float %29, float %28, i32 3, i32 0, i32 %6) #1 %31 = fcmp nsz oge float %25, %30 %32 = select i1 %31, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %32) %33 = bitcast <2 x i32> %8 to <2 x float> %34 = extractelement <2 x float> %33, i32 0 %35 = extractelement <2 x float> %33, i32 1 %36 = call nsz float @llvm.amdgcn.interp.p1(float %34, i32 0, i32 1, i32 %6) #1 %37 = call nsz float @llvm.amdgcn.interp.p2(float %36, float %35, i32 0, i32 1, i32 %6) #1 %38 = call nsz float @llvm.AMDGPU.clamp.(float %37, float 0.000000e+00, float 1.000000e+00) %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !invariant.load !0 %41 = call nsz float @llvm.SI.load.const(<16 x i8> %40, i32 0) %42 = bitcast <2 x i32> %8 to <2 x float> %43 = extractelement <2 x float> %42, i32 0 %44 = extractelement <2 x float> %42, i32 1 %45 = call nsz float @llvm.amdgcn.interp.p1(float %43, i32 0, i32 0, i32 %6) #1 %46 = call nsz float @llvm.amdgcn.interp.p2(float %45, float %44, i32 0, i32 0, i32 %6) #1 %47 = fsub nsz float 1.000000e+00, %38 %48 = fmul nsz float %41, %38 %49 = fmul nsz float %46, %47 %50 = fadd nsz float %48, %49 %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !invariant.load !0 %53 = call nsz float @llvm.SI.load.const(<16 x i8> %52, i32 4) %54 = bitcast <2 x i32> %8 to <2 x float> %55 = extractelement <2 x float> %54, i32 0 %56 = extractelement <2 x float> %54, i32 1 %57 = call nsz float @llvm.amdgcn.interp.p1(float %55, i32 1, i32 0, i32 %6) #1 %58 = call nsz float @llvm.amdgcn.interp.p2(float %57, float %56, i32 1, i32 0, i32 %6) #1 %59 = fsub nsz float 1.000000e+00, %38 %60 = fmul nsz float %53, %38 %61 = fmul nsz float %58, %59 %62 = fadd nsz float %60, %61 %63 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !invariant.load !0 %65 = call nsz float @llvm.SI.load.const(<16 x i8> %64, i32 8) %66 = bitcast <2 x i32> %8 to <2 x float> %67 = extractelement <2 x float> %66, i32 0 %68 = extractelement <2 x float> %66, i32 1 %69 = call nsz float @llvm.amdgcn.interp.p1(float %67, i32 2, i32 0, i32 %6) #1 %70 = call nsz float @llvm.amdgcn.interp.p2(float %69, float %68, i32 2, i32 0, i32 %6) #1 %71 = fsub nsz float 1.000000e+00, %38 %72 = fmul nsz float %65, %38 %73 = fmul nsz float %70, %71 %74 = fadd nsz float %72, %73 %75 = bitcast <2 x i32> %8 to <2 x float> %76 = extractelement <2 x float> %75, i32 0 %77 = extractelement <2 x float> %75, i32 1 %78 = call nsz float @llvm.amdgcn.interp.p1(float %76, i32 3, i32 0, i32 %6) #1 %79 = call nsz float @llvm.amdgcn.interp.p2(float %78, float %77, i32 3, i32 0, i32 %6) #1 %80 = bitcast float %5 to i32 %81 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %80, 10 %82 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %81, float %50, 11 %83 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %82, float %62, 12 %84 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %83, float %74, 13 %85 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %84, float %79, 14 %86 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %85, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %86 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind declare void @llvm.AMDGPU.kill(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.clamp.(float, float, float) #1 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind } !0 = !{} SHADER KEY part.vs.prolog.instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} part.vs.epilog.export_prim_id = 0 as_es = 0 as_ls = 0 mono.vs.fix_fetch = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} opt.hw_vs.kill_outputs = 0x0 opt.hw_vs.kill_outputs2 = 0x0 opt.hw_vs.clip_disable = 0 Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s12, v0 ; 3208000C v_mov_b32_e32 v5, v4 ; 7E0A0304 Shader main disassembly: s_load_dwordx4 s[4:7], s[10:11], 0x10 ; C00A0105 00000010 s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C00A0005 00000000 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[10:13], v5, s[4:7], 0 idxen ; E00C2000 80010A05 s_nop 0 ; BF800000 buffer_load_format_xyzw v[6:9], v4, s[0:3], 0 idxen ; E00C2000 80000604 s_waitcnt vmcnt(1) ; BF8C0F71 exp param0 v10, v11, v12, v13 ; C400020F 0D0C0B0A s_waitcnt vmcnt(0) ; BF8C0F70 exp pos0 v6, v7, v8, v9 done ; C40008CF 09080706 s_waitcnt expcnt(0) ; BF8C0F0F Shader epilog disassembly: s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x4 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b32 m0, s11 ; BEFC000B v_interp_mov_f32 v0, p0, attr0.x ; D4020002 v_interp_mov_f32 v1, p0, attr0.y ; D4060102 v_interp_mov_f32 v2, p0, attr0.z ; D40A0202 v_interp_mov_f32 v3, p0, attr0.w ; D40E0302 Shader epilog disassembly: v_cvt_pkrtz_f16_f32_e64 v0, v0, v1 ; D2960000 00020300 v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; D2960001 00020702 exp mrt0 v0, v1, v0, v0 done compr vm ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0020 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 48 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** Texture #1 1,1 Texture #2 1,1 finished(2)!! VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2].xy, GENERIC[1] DCL CONST[0..19] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[16], IN[0].xxxx 1: MAD TEMP[0], CONST[17], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[18], IN[0].zzzz, TEMP[0] 3: ADD OUT[0], TEMP[0], CONST[19] 4: MOV OUT[1], IN[1] 5: MOV OUT[2].xy, IN[2].xyxx 6: END radeonsi: Compiling shader 28 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) #0 { main_body: %17 = bitcast [16 x <16 x i8>] addrspace(2)* %5 to <4 x i32> addrspace(2)* %18 = load <4 x i32>, <4 x i32> addrspace(2)* %17, align 16, !invariant.load !0 %19 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %18, i32 %14, i32 0, i1 false, i1 false) #2 %20 = extractelement <4 x float> %19, i32 0 %21 = extractelement <4 x float> %19, i32 1 %22 = extractelement <4 x float> %19, i32 2 %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %24 = bitcast <16 x i8> addrspace(2)* %23 to <4 x i32> addrspace(2)* %25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !invariant.load !0 %26 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %25, i32 %15, i32 0, i1 false, i1 false) #2 %27 = extractelement <4 x float> %26, i32 0 %28 = extractelement <4 x float> %26, i32 1 %29 = extractelement <4 x float> %26, i32 2 %30 = extractelement <4 x float> %26, i32 3 %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2, !amdgpu.uniform !0 %32 = bitcast <16 x i8> addrspace(2)* %31 to <4 x i32> addrspace(2)* %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !invariant.load !0 %34 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %33, i32 %16, i32 0, i1 false, i1 false) #2 %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !invariant.load !0 %39 = call nsz float @llvm.SI.load.const(<16 x i8> %38, i32 256) %40 = fmul nsz float %39, %20 %41 = call nsz float @llvm.SI.load.const(<16 x i8> %38, i32 260) %42 = fmul nsz float %41, %20 %43 = call nsz float @llvm.SI.load.const(<16 x i8> %38, i32 264) %44 = fmul nsz float %43, %20 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !invariant.load !0 %47 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 268) %48 = fmul nsz float %47, %20 %49 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 272) %50 = fmul nsz float %49, %21 %51 = fadd nsz float %50, %40 %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !invariant.load !0 %54 = call nsz float @llvm.SI.load.const(<16 x i8> %53, i32 276) %55 = fmul nsz float %54, %21 %56 = fadd nsz float %55, %42 %57 = call nsz float @llvm.SI.load.const(<16 x i8> %53, i32 280) %58 = fmul nsz float %57, %21 %59 = fadd nsz float %58, %44 %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !invariant.load !0 %62 = call nsz float @llvm.SI.load.const(<16 x i8> %61, i32 284) %63 = fmul nsz float %62, %21 %64 = fadd nsz float %63, %48 %65 = call nsz float @llvm.SI.load.const(<16 x i8> %61, i32 288) %66 = fmul nsz float %65, %22 %67 = fadd nsz float %66, %51 %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !invariant.load !0 %70 = call nsz float @llvm.SI.load.const(<16 x i8> %69, i32 292) %71 = fmul nsz float %70, %22 %72 = fadd nsz float %71, %56 %73 = call nsz float @llvm.SI.load.const(<16 x i8> %69, i32 296) %74 = fmul nsz float %73, %22 %75 = fadd nsz float %74, %59 %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !invariant.load !0 %78 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 300) %79 = fmul nsz float %78, %22 %80 = fadd nsz float %79, %64 %81 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 304) %82 = fadd nsz float %67, %81 %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !invariant.load !0 %85 = call nsz float @llvm.SI.load.const(<16 x i8> %84, i32 308) %86 = fadd nsz float %72, %85 %87 = call nsz float @llvm.SI.load.const(<16 x i8> %84, i32 312) %88 = fadd nsz float %75, %87 %89 = call nsz float @llvm.SI.load.const(<16 x i8> %84, i32 316) %90 = fadd nsz float %80, %89 %91 = bitcast i32 %12 to float %92 = insertvalue <{ float, float, float }> undef, float %91, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %27, float %28, float %29, float %30) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %35, float %36, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %82, float %86, float %88, float %90) ret <{ float, float, float }> %92 } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #2 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #3 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } attributes #3 = { nounwind } !0 = !{} FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1].xy, GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: ADD TEMP[1].x, TEMP[0].xxxx, CONST[1].xxxx 3: MIN TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 4: ADD TEMP[2].x, TEMP[0].yyyy, CONST[1].xxxx 5: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx 6: MOV TEMP[1].y, TEMP[2].xxxx 7: ADD TEMP[2].x, TEMP[0].zzzz, CONST[1].xxxx 8: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx 9: MOV TEMP[1].z, TEMP[2].xxxx 10: MOV TEMP[1].w, TEMP[0].wwww 11: MUL OUT[0], IN[0], TEMP[1] 12: END radeonsi: Compiling shader 29 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = bitcast <2 x i32> %8 to <2 x float> %24 = extractelement <2 x float> %23, i32 0 %25 = extractelement <2 x float> %23, i32 1 %26 = call nsz float @llvm.amdgcn.interp.p1(float %24, i32 0, i32 1, i32 %6) #1 %27 = call nsz float @llvm.amdgcn.interp.p2(float %26, float %25, i32 0, i32 1, i32 %6) #1 %28 = bitcast <2 x i32> %8 to <2 x float> %29 = extractelement <2 x float> %28, i32 0 %30 = extractelement <2 x float> %28, i32 1 %31 = call nsz float @llvm.amdgcn.interp.p1(float %29, i32 1, i32 1, i32 %6) #1 %32 = call nsz float @llvm.amdgcn.interp.p2(float %31, float %30, i32 1, i32 1, i32 %6) #1 %33 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %34 = load <8 x i32>, <8 x i32> addrspace(2)* %33, align 32, !invariant.load !0 %35 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %36 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %35, i64 0, i64 3, !amdgpu.uniform !0 %37 = load <4 x i32>, <4 x i32> addrspace(2)* %36, align 16, !invariant.load !0 %38 = bitcast float %27 to i32 %39 = bitcast float %32 to i32 %40 = insertelement <2 x i32> undef, i32 %38, i32 0 %41 = insertelement <2 x i32> %40, i32 %39, i32 1 %42 = bitcast <2 x i32> %41 to <2 x float> %43 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %42, <8 x i32> %34, <4 x i32> %37, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = extractelement <4 x float> %43, i32 2 %47 = extractelement <4 x float> %43, i32 3 %48 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !invariant.load !0 %50 = call nsz float @llvm.SI.load.const(<16 x i8> %49, i32 16) %51 = fadd nsz float %44, %50 %52 = call nsz float @llvm.minnum.f32(float %51, float 1.000000e+00) #1 %53 = call nsz float @llvm.SI.load.const(<16 x i8> %49, i32 16) %54 = fadd nsz float %45, %53 %55 = call nsz float @llvm.minnum.f32(float %54, float 1.000000e+00) #1 %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !invariant.load !0 %58 = call nsz float @llvm.SI.load.const(<16 x i8> %57, i32 16) %59 = fadd nsz float %46, %58 %60 = call nsz float @llvm.minnum.f32(float %59, float 1.000000e+00) #1 %61 = bitcast <2 x i32> %8 to <2 x float> %62 = extractelement <2 x float> %61, i32 0 %63 = extractelement <2 x float> %61, i32 1 %64 = call nsz float @llvm.amdgcn.interp.p1(float %62, i32 0, i32 0, i32 %6) #1 %65 = call nsz float @llvm.amdgcn.interp.p2(float %64, float %63, i32 0, i32 0, i32 %6) #1 %66 = fmul nsz float %65, %52 %67 = bitcast <2 x i32> %8 to <2 x float> %68 = extractelement <2 x float> %67, i32 0 %69 = extractelement <2 x float> %67, i32 1 %70 = call nsz float @llvm.amdgcn.interp.p1(float %68, i32 1, i32 0, i32 %6) #1 %71 = call nsz float @llvm.amdgcn.interp.p2(float %70, float %69, i32 1, i32 0, i32 %6) #1 %72 = fmul nsz float %71, %55 %73 = bitcast <2 x i32> %8 to <2 x float> %74 = extractelement <2 x float> %73, i32 0 %75 = extractelement <2 x float> %73, i32 1 %76 = call nsz float @llvm.amdgcn.interp.p1(float %74, i32 2, i32 0, i32 %6) #1 %77 = call nsz float @llvm.amdgcn.interp.p2(float %76, float %75, i32 2, i32 0, i32 %6) #1 %78 = fmul nsz float %77, %60 %79 = bitcast <2 x i32> %8 to <2 x float> %80 = extractelement <2 x float> %79, i32 0 %81 = extractelement <2 x float> %79, i32 1 %82 = call nsz float @llvm.amdgcn.interp.p1(float %80, i32 3, i32 0, i32 %6) #1 %83 = call nsz float @llvm.amdgcn.interp.p2(float %82, float %81, i32 3, i32 0, i32 %6) #1 %84 = fmul nsz float %83, %47 %85 = bitcast float %5 to i32 %86 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %85, 10 %87 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %86, float %66, 11 %88 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %87, float %72, 12 %89 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %88, float %78, 13 %90 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %89, float %84, 14 %91 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %90, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %91 } ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readonly } !0 = !{} FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1].xy, GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1..9] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 6.2830, 0.5000, 1.0000, 0.0400} 0: ADD TEMP[0].x, CONST[8].zzzz, -CONST[8].xxxx 1: ADD TEMP[1].x, CONST[8].wwww, -CONST[8].yyyy 2: MOV TEMP[0].y, TEMP[1].xxxx 3: RCP TEMP[1].x, CONST[7].xxxx 4: RCP TEMP[1].y, CONST[7].yyyy 5: MUL TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy 6: ADD TEMP[1].xy, IN[1].xyyy, -CONST[8].xyyy 7: RCP TEMP[2].x, TEMP[0].xxxx 8: RCP TEMP[2].y, TEMP[0].yyyy 9: MAD TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, CONST[6].xyyy 10: RCP TEMP[2].x, CONST[1].xxxx 11: RCP TEMP[2].y, CONST[1].yyyy 12: MUL TEMP[2].xy, IMM[0].xxxx, TEMP[2].xyyy 13: MAD TEMP[3].xy, CONST[5].xxxx, CONST[3].xyyy, TEMP[1].xyyy 14: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[3].xyyy 15: SIN TEMP[3].x, TEMP[2].xxxx 16: SIN TEMP[3].y, TEMP[2].yyyy 17: DP2 TEMP[1].x, TEMP[1].xyyy, TEMP[1].xyyy 18: SQRT TEMP[1].x, TEMP[1].xxxx 19: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 20: RCP TEMP[2].x, CONST[2].xxxx 21: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 22: SIN TEMP[1].x, TEMP[1].xxxx 23: MUL TEMP[1].xy, CONST[4].xyyy, TEMP[1].xxxx 24: MUL TEMP[1].xy, TEMP[3].xyyy, TEMP[1].xyyy 25: MUL TEMP[1].xy, TEMP[1].xyyy, TEMP[0].xyyy 26: ADD TEMP[0].xy, IN[1].xyyy, TEMP[1].xyyy 27: MOV TEMP[0].xy, TEMP[0].xyyy 28: TEX TEMP[0], TEMP[0], SAMP[0], 2D 29: MUL TEMP[0], IN[0], TEMP[0] 30: MOV TEMP[1].w, TEMP[0].wwww 31: ADD TEMP[2].x, TEMP[3].xxxx, TEMP[3].yyyy 32: MAD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy, IMM[0].zzzz 33: MUL TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy 34: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 35: MAD TEMP[1].xyz, TEMP[2].xxxx, IMM[0].wwww, TEMP[0].xyzz 36: LRP TEMP[1].xyz, IMM[0].yyyy, CONST[9].xyzz, TEMP[1].xyzz 37: MOV OUT[0], TEMP[1] 38: END radeonsi: Compiling shader 30 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !invariant.load !0 %25 = call nsz float @llvm.SI.load.const(<16 x i8> %24, i32 136) %26 = call nsz float @llvm.SI.load.const(<16 x i8> %24, i32 128) %27 = fsub nsz float %25, %26 %28 = call nsz float @llvm.SI.load.const(<16 x i8> %24, i32 140) %29 = call nsz float @llvm.SI.load.const(<16 x i8> %24, i32 132) %30 = fsub nsz float %28, %29 %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !invariant.load !0 %33 = call nsz float @llvm.SI.load.const(<16 x i8> %32, i32 112) %34 = fdiv nsz float 1.000000e+00, %33, !fpmath !1 %35 = call nsz float @llvm.SI.load.const(<16 x i8> %32, i32 116) %36 = fdiv nsz float 1.000000e+00, %35, !fpmath !1 %37 = fmul nsz float %27, %34 %38 = fmul nsz float %30, %36 %39 = bitcast <2 x i32> %8 to <2 x float> %40 = extractelement <2 x float> %39, i32 0 %41 = extractelement <2 x float> %39, i32 1 %42 = call nsz float @llvm.amdgcn.interp.p1(float %40, i32 0, i32 1, i32 %6) #1 %43 = call nsz float @llvm.amdgcn.interp.p2(float %42, float %41, i32 0, i32 1, i32 %6) #1 %44 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !invariant.load !0 %46 = call nsz float @llvm.SI.load.const(<16 x i8> %45, i32 128) %47 = fsub nsz float %43, %46 %48 = bitcast <2 x i32> %8 to <2 x float> %49 = extractelement <2 x float> %48, i32 0 %50 = extractelement <2 x float> %48, i32 1 %51 = call nsz float @llvm.amdgcn.interp.p1(float %49, i32 1, i32 1, i32 %6) #1 %52 = call nsz float @llvm.amdgcn.interp.p2(float %51, float %50, i32 1, i32 1, i32 %6) #1 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !invariant.load !0 %55 = call nsz float @llvm.SI.load.const(<16 x i8> %54, i32 132) %56 = fsub nsz float %52, %55 %57 = fdiv nsz float 1.000000e+00, %37, !fpmath !1 %58 = fdiv nsz float 1.000000e+00, %38, !fpmath !1 %59 = call nsz float @llvm.SI.load.const(<16 x i8> %54, i32 96) %60 = fmul nsz float %47, %57 %61 = fadd nsz float %60, %59 %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !invariant.load !0 %64 = call nsz float @llvm.SI.load.const(<16 x i8> %63, i32 100) %65 = fmul nsz float %56, %58 %66 = fadd nsz float %65, %64 %67 = call nsz float @llvm.SI.load.const(<16 x i8> %63, i32 16) %68 = fdiv nsz float 1.000000e+00, %67, !fpmath !1 %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !invariant.load !0 %71 = call nsz float @llvm.SI.load.const(<16 x i8> %70, i32 20) %72 = fdiv nsz float 1.000000e+00, %71, !fpmath !1 %73 = fmul nsz float %68, 0x401921CAC0000000 %74 = fmul nsz float %72, 0x401921CAC0000000 %75 = call nsz float @llvm.SI.load.const(<16 x i8> %70, i32 80) %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !invariant.load !0 %78 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 48) %79 = fmul nsz float %75, %78 %80 = fadd nsz float %79, %61 %81 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 80) %82 = call nsz float @llvm.SI.load.const(<16 x i8> %77, i32 52) %83 = fmul nsz float %81, %82 %84 = fadd nsz float %83, %66 %85 = fmul nsz float %73, %80 %86 = fmul nsz float %74, %84 %87 = call nsz float @llvm.sin.f32(float %85) #1 %88 = call nsz float @llvm.sin.f32(float %86) #1 %89 = fmul nsz float %61, %61 %90 = fmul nsz float %66, %66 %91 = fadd nsz float %89, %90 %92 = call nsz float @llvm.sqrt.f32(float %91) #1 %93 = fmul nsz float %92, 0x401921CAC0000000 %94 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %95 = load <16 x i8>, <16 x i8> addrspace(2)* %94, align 16, !invariant.load !0 %96 = call nsz float @llvm.SI.load.const(<16 x i8> %95, i32 32) %97 = fdiv nsz float 1.000000e+00, %96, !fpmath !1 %98 = fmul nsz float %93, %97 %99 = call nsz float @llvm.sin.f32(float %98) #1 %100 = call nsz float @llvm.SI.load.const(<16 x i8> %95, i32 64) %101 = fmul nsz float %100, %99 %102 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !invariant.load !0 %104 = call nsz float @llvm.SI.load.const(<16 x i8> %103, i32 68) %105 = fmul nsz float %104, %99 %106 = fmul nsz float %87, %101 %107 = fmul nsz float %88, %105 %108 = fmul nsz float %106, %37 %109 = fmul nsz float %107, %38 %110 = bitcast <2 x i32> %8 to <2 x float> %111 = extractelement <2 x float> %110, i32 0 %112 = extractelement <2 x float> %110, i32 1 %113 = call nsz float @llvm.amdgcn.interp.p1(float %111, i32 0, i32 1, i32 %6) #1 %114 = call nsz float @llvm.amdgcn.interp.p2(float %113, float %112, i32 0, i32 1, i32 %6) #1 %115 = fadd nsz float %114, %108 %116 = bitcast <2 x i32> %8 to <2 x float> %117 = extractelement <2 x float> %116, i32 0 %118 = extractelement <2 x float> %116, i32 1 %119 = call nsz float @llvm.amdgcn.interp.p1(float %117, i32 1, i32 1, i32 %6) #1 %120 = call nsz float @llvm.amdgcn.interp.p2(float %119, float %118, i32 1, i32 1, i32 %6) #1 %121 = fadd nsz float %120, %109 %122 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %123 = load <8 x i32>, <8 x i32> addrspace(2)* %122, align 32, !invariant.load !0 %124 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %125 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %124, i64 0, i64 3, !amdgpu.uniform !0 %126 = load <4 x i32>, <4 x i32> addrspace(2)* %125, align 16, !invariant.load !0 %127 = bitcast float %115 to i32 %128 = bitcast float %121 to i32 %129 = insertelement <2 x i32> undef, i32 %127, i32 0 %130 = insertelement <2 x i32> %129, i32 %128, i32 1 %131 = bitcast <2 x i32> %130 to <2 x float> %132 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %131, <8 x i32> %123, <4 x i32> %126, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %133 = extractelement <4 x float> %132, i32 0 %134 = extractelement <4 x float> %132, i32 1 %135 = extractelement <4 x float> %132, i32 2 %136 = extractelement <4 x float> %132, i32 3 %137 = bitcast <2 x i32> %8 to <2 x float> %138 = extractelement <2 x float> %137, i32 0 %139 = extractelement <2 x float> %137, i32 1 %140 = call nsz float @llvm.amdgcn.interp.p1(float %138, i32 0, i32 0, i32 %6) #1 %141 = call nsz float @llvm.amdgcn.interp.p2(float %140, float %139, i32 0, i32 0, i32 %6) #1 %142 = fmul nsz float %141, %133 %143 = bitcast <2 x i32> %8 to <2 x float> %144 = extractelement <2 x float> %143, i32 0 %145 = extractelement <2 x float> %143, i32 1 %146 = call nsz float @llvm.amdgcn.interp.p1(float %144, i32 1, i32 0, i32 %6) #1 %147 = call nsz float @llvm.amdgcn.interp.p2(float %146, float %145, i32 1, i32 0, i32 %6) #1 %148 = fmul nsz float %147, %134 %149 = bitcast <2 x i32> %8 to <2 x float> %150 = extractelement <2 x float> %149, i32 0 %151 = extractelement <2 x float> %149, i32 1 %152 = call nsz float @llvm.amdgcn.interp.p1(float %150, i32 2, i32 0, i32 %6) #1 %153 = call nsz float @llvm.amdgcn.interp.p2(float %152, float %151, i32 2, i32 0, i32 %6) #1 %154 = fmul nsz float %153, %135 %155 = bitcast <2 x i32> %8 to <2 x float> %156 = extractelement <2 x float> %155, i32 0 %157 = extractelement <2 x float> %155, i32 1 %158 = call nsz float @llvm.amdgcn.interp.p1(float %156, i32 3, i32 0, i32 %6) #1 %159 = call nsz float @llvm.amdgcn.interp.p2(float %158, float %157, i32 3, i32 0, i32 %6) #1 %160 = fmul nsz float %159, %136 %161 = fadd nsz float %87, %88 %162 = fmul nsz float %161, 5.000000e-01 %163 = fadd nsz float %162, 1.000000e+00 %164 = fmul nsz float %163, 5.000000e-01 %165 = fmul nsz float %164, %164 %166 = fmul nsz float %165, 0x3FA47AE140000000 %167 = fadd nsz float %166, %142 %168 = fmul nsz float %165, 0x3FA47AE140000000 %169 = fadd nsz float %168, %148 %170 = fmul nsz float %165, 0x3FA47AE140000000 %171 = fadd nsz float %170, %154 %172 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %173 = load <16 x i8>, <16 x i8> addrspace(2)* %172, align 16, !invariant.load !0 %174 = call nsz float @llvm.SI.load.const(<16 x i8> %173, i32 144) %175 = fmul nsz float %174, 5.000000e-01 %176 = fmul nsz float %167, 5.000000e-01 %177 = fadd nsz float %175, %176 %178 = call nsz float @llvm.SI.load.const(<16 x i8> %173, i32 148) %179 = fmul nsz float %178, 5.000000e-01 %180 = fmul nsz float %169, 5.000000e-01 %181 = fadd nsz float %179, %180 %182 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %183 = load <16 x i8>, <16 x i8> addrspace(2)* %182, align 16, !invariant.load !0 %184 = call nsz float @llvm.SI.load.const(<16 x i8> %183, i32 152) %185 = fmul nsz float %184, 5.000000e-01 %186 = fmul nsz float %171, 5.000000e-01 %187 = fadd nsz float %185, %186 %188 = bitcast float %5 to i32 %189 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %188, 10 %190 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %189, float %177, 11 %191 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %190, float %181, 12 %192 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %191, float %187, 13 %193 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %192, float %160, 14 %194 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %193, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %194 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readonly } !0 = !{} !1 = !{float 2.500000e+00} FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1].xy, GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1..9] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 0.0000, 6.2830, 1.0000, 0.5000} IMM[1] FLT32 { 0.4000, 0.9900, 0.2500, 1.0472} IMM[2] FLT32 { 0.7500, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].x, CONST[4].zzzz, -CONST[4].xxxx 1: ADD TEMP[1].x, CONST[4].wwww, -CONST[4].yyyy 2: RCP TEMP[2].x, CONST[3].xxxx 3: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx 4: RCP TEMP[2].x, CONST[3].yyyy 5: MUL TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx 6: MOV TEMP[3].y, IMM[0].xxxx 7: RCP TEMP[4].x, CONST[7].xxxx 8: MUL TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx 9: RCP TEMP[5].x, TEMP[2].xxxx 10: MUL TEMP[5].x, IN[1].yyyy, TEMP[5].xxxx 11: MAD TEMP[5].x, CONST[2].xxxx, CONST[8].xxxx, TEMP[5].xxxx 12: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 13: SIN TEMP[4].x, TEMP[4].xxxx 14: MUL TEMP[0].x, CONST[9].xxxx, TEMP[0].xxxx 15: MUL TEMP[3].x, TEMP[4].xxxx, TEMP[0].xxxx 16: ADD TEMP[0].xy, IN[1].xyyy, TEMP[3].xyyy 17: MOV TEMP[0].xy, TEMP[0].xyyy 18: TEX TEMP[0], TEMP[0], SAMP[0], 2D 19: MUL TEMP[0], IN[0], TEMP[0] 20: MOV TEMP[3], TEMP[0] 21: FSNE TEMP[4].x, TEMP[0].wwww, IMM[0].xxxx 22: UIF TEMP[4].xxxx 23: ADD TEMP[4].x, IN[1].yyyy, -CONST[4].yyyy 24: RCP TEMP[1].x, TEMP[1].xxxx 25: MUL TEMP[1].x, TEMP[4].xxxx, TEMP[1].xxxx 26: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1].xxxx 27: ADD TEMP[1].x, CONST[2].xxxx, -TEMP[1].xxxx 28: RCP TEMP[4].x, CONST[1].xxxx 29: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx 30: FRC TEMP[1].x, TEMP[1].xxxx 31: ADD TEMP[1].x, IMM[0].zzzz, -TEMP[1].xxxx 32: LRP TEMP[4].x, TEMP[1].xxxx, CONST[5].xxxx, CONST[6].xxxx 33: MOV TEMP[3].w, TEMP[4].xxxx 34: ADD TEMP[1].x, IMM[0].zzzz, -TEMP[1].xxxx 35: MUL TEMP[1].x, IMM[1].xxxx, TEMP[1].xxxx 36: LRP TEMP[3].xyz, TEMP[1].xxxx, IMM[0].xwzz, TEMP[0].xyzz 37: MUL TEMP[0].x, CONST[5].xxxx, IMM[1].yyyy 38: FSLT TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx 39: UCMP TEMP[3], TEMP[0].xxxx, IMM[0].xzzz, TEMP[3] 40: MUL TEMP[0].x, IMM[1].zzzz, IMM[0].wwww 41: MUL TEMP[1].x, IMM[1].wwww, IN[1].yyyy 42: RCP TEMP[2].x, TEMP[2].xxxx 43: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 44: SIN TEMP[1].x, TEMP[1].xxxx 45: ADD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz 46: MAD TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx, IMM[2].xxxx 47: MUL TEMP[0].x, TEMP[3].wwww, TEMP[0].xxxx 48: MOV TEMP[3].w, TEMP[0].xxxx 49: MOV OUT[0], TEMP[3] 50: ENDIF 51: END radeonsi: Compiling shader 31 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !invariant.load !0 %25 = call nsz float @llvm.SI.load.const(<16 x i8> %24, i32 72) %26 = call nsz float @llvm.SI.load.const(<16 x i8> %24, i32 64) %27 = fsub nsz float %25, %26 %28 = call nsz float @llvm.SI.load.const(<16 x i8> %24, i32 76) %29 = call nsz float @llvm.SI.load.const(<16 x i8> %24, i32 68) %30 = fsub nsz float %28, %29 %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !invariant.load !0 %33 = call nsz float @llvm.SI.load.const(<16 x i8> %32, i32 48) %34 = fdiv nsz float 1.000000e+00, %33, !fpmath !1 %35 = fmul nsz float %27, %34 %36 = call nsz float @llvm.SI.load.const(<16 x i8> %32, i32 52) %37 = fdiv nsz float 1.000000e+00, %36, !fpmath !1 %38 = fmul nsz float %30, %37 %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !invariant.load !0 %41 = call nsz float @llvm.SI.load.const(<16 x i8> %40, i32 112) %42 = fdiv nsz float 1.000000e+00, %41, !fpmath !1 %43 = fmul nsz float %42, 0x401921CAC0000000 %44 = fdiv nsz float 1.000000e+00, %38, !fpmath !1 %45 = bitcast <2 x i32> %8 to <2 x float> %46 = extractelement <2 x float> %45, i32 0 %47 = extractelement <2 x float> %45, i32 1 %48 = call nsz float @llvm.amdgcn.interp.p1(float %46, i32 1, i32 1, i32 %6) #1 %49 = call nsz float @llvm.amdgcn.interp.p2(float %48, float %47, i32 1, i32 1, i32 %6) #1 %50 = fmul nsz float %49, %44 %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !invariant.load !0 %53 = call nsz float @llvm.SI.load.const(<16 x i8> %52, i32 32) %54 = call nsz float @llvm.SI.load.const(<16 x i8> %52, i32 128) %55 = fmul nsz float %53, %54 %56 = fadd nsz float %55, %50 %57 = fmul nsz float %43, %56 %58 = call nsz float @llvm.sin.f32(float %57) #1 %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !invariant.load !0 %61 = call nsz float @llvm.SI.load.const(<16 x i8> %60, i32 144) %62 = fmul nsz float %61, %35 %63 = fmul nsz float %58, %62 %64 = bitcast <2 x i32> %8 to <2 x float> %65 = extractelement <2 x float> %64, i32 0 %66 = extractelement <2 x float> %64, i32 1 %67 = call nsz float @llvm.amdgcn.interp.p1(float %65, i32 0, i32 1, i32 %6) #1 %68 = call nsz float @llvm.amdgcn.interp.p2(float %67, float %66, i32 0, i32 1, i32 %6) #1 %69 = fadd nsz float %68, %63 %70 = bitcast <2 x i32> %8 to <2 x float> %71 = extractelement <2 x float> %70, i32 0 %72 = extractelement <2 x float> %70, i32 1 %73 = call nsz float @llvm.amdgcn.interp.p1(float %71, i32 1, i32 1, i32 %6) #1 %74 = call nsz float @llvm.amdgcn.interp.p2(float %73, float %72, i32 1, i32 1, i32 %6) #1 %75 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %76 = load <8 x i32>, <8 x i32> addrspace(2)* %75, align 32, !invariant.load !0 %77 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %78 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %77, i64 0, i64 3, !amdgpu.uniform !0 %79 = load <4 x i32>, <4 x i32> addrspace(2)* %78, align 16, !invariant.load !0 %80 = bitcast float %69 to i32 %81 = bitcast float %74 to i32 %82 = insertelement <2 x i32> undef, i32 %80, i32 0 %83 = insertelement <2 x i32> %82, i32 %81, i32 1 %84 = bitcast <2 x i32> %83 to <2 x float> %85 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %84, <8 x i32> %76, <4 x i32> %79, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %86 = extractelement <4 x float> %85, i32 3 %87 = bitcast <2 x i32> %8 to <2 x float> %88 = bitcast <2 x i32> %8 to <2 x float> %89 = bitcast <2 x i32> %8 to <2 x float> %90 = bitcast <2 x i32> %8 to <2 x float> %91 = extractelement <2 x float> %90, i32 0 %92 = extractelement <2 x float> %90, i32 1 %93 = call nsz float @llvm.amdgcn.interp.p1(float %91, i32 3, i32 0, i32 %6) #1 %94 = call nsz float @llvm.amdgcn.interp.p2(float %93, float %92, i32 3, i32 0, i32 %6) #1 %95 = fmul nsz float %94, %86 %96 = fcmp nsz une float %95, 0.000000e+00 br i1 %96, label %if22, label %endif50 if22: ; preds = %main_body %97 = extractelement <2 x float> %89, i32 0 %98 = call nsz float @llvm.amdgcn.interp.p1(float %97, i32 2, i32 0, i32 %6) #1 %99 = extractelement <2 x float> %89, i32 1 %100 = call nsz float @llvm.amdgcn.interp.p2(float %98, float %99, i32 2, i32 0, i32 %6) #1 %101 = extractelement <4 x float> %85, i32 2 %102 = fmul nsz float %100, %101 %103 = extractelement <2 x float> %88, i32 0 %104 = call nsz float @llvm.amdgcn.interp.p1(float %103, i32 1, i32 0, i32 %6) #1 %105 = extractelement <2 x float> %88, i32 1 %106 = call nsz float @llvm.amdgcn.interp.p2(float %104, float %105, i32 1, i32 0, i32 %6) #1 %107 = extractelement <4 x float> %85, i32 1 %108 = fmul nsz float %106, %107 %109 = extractelement <2 x float> %87, i32 0 %110 = call nsz float @llvm.amdgcn.interp.p1(float %109, i32 0, i32 0, i32 %6) #1 %111 = extractelement <2 x float> %87, i32 1 %112 = call nsz float @llvm.amdgcn.interp.p2(float %110, float %111, i32 0, i32 0, i32 %6) #1 %113 = extractelement <4 x float> %85, i32 0 %114 = fmul nsz float %112, %113 %115 = bitcast <2 x i32> %8 to <2 x float> %116 = extractelement <2 x float> %115, i32 0 %117 = extractelement <2 x float> %115, i32 1 %118 = call nsz float @llvm.amdgcn.interp.p1(float %116, i32 1, i32 1, i32 %6) #1 %119 = call nsz float @llvm.amdgcn.interp.p2(float %118, float %117, i32 1, i32 1, i32 %6) #1 %120 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %121 = load <16 x i8>, <16 x i8> addrspace(2)* %120, align 16, !invariant.load !0 %122 = call nsz float @llvm.SI.load.const(<16 x i8> %121, i32 68) %123 = fsub nsz float %119, %122 %124 = fdiv nsz float 1.000000e+00, %30, !fpmath !1 %125 = fmul nsz float %123, %124 %126 = call nsz float @llvm.SI.load.const(<16 x i8> %121, i32 16) %127 = fmul nsz float %125, %126 %128 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %129 = load <16 x i8>, <16 x i8> addrspace(2)* %128, align 16, !invariant.load !0 %130 = call nsz float @llvm.SI.load.const(<16 x i8> %129, i32 32) %131 = fsub nsz float %130, %127 %132 = call nsz float @llvm.SI.load.const(<16 x i8> %129, i32 16) %133 = fdiv nsz float 1.000000e+00, %132, !fpmath !1 %134 = fmul nsz float %131, %133 %135 = call nsz float @llvm.floor.f32(float %134) #1 %136 = fsub nsz float %134, %135 %137 = fsub nsz float 1.000000e+00, %136 %138 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %139 = load <16 x i8>, <16 x i8> addrspace(2)* %138, align 16, !invariant.load !0 %140 = call nsz float @llvm.SI.load.const(<16 x i8> %139, i32 80) %141 = call nsz float @llvm.SI.load.const(<16 x i8> %139, i32 96) %142 = fsub nsz float 1.000000e+00, %137 %143 = fmul nsz float %140, %137 %144 = fmul nsz float %141, %142 %145 = fadd nsz float %143, %144 %146 = fsub nsz float 1.000000e+00, %137 %147 = fmul nsz float %146, 0x3FD99999A0000000 %148 = fsub nsz float 1.000000e+00, %147 %149 = fmul nsz float %114, %148 %150 = fsub nsz float 1.000000e+00, %147 %151 = fmul nsz float %147, 5.000000e-01 %152 = fmul nsz float %108, %150 %153 = fadd nsz float %151, %152 %154 = fsub nsz float 1.000000e+00, %147 %155 = fmul nsz float %102, %154 %156 = fadd nsz float %147, %155 %157 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %158 = load <16 x i8>, <16 x i8> addrspace(2)* %157, align 16, !invariant.load !0 %159 = call nsz float @llvm.SI.load.const(<16 x i8> %158, i32 80) %160 = fmul nsz float %159, 0x3FEFAE1480000000 %161 = fcmp nsz olt float %160, %145 %162 = select i1 %161, float 0.000000e+00, float %149 %163 = select i1 %161, float 1.000000e+00, float %153 %164 = select i1 %161, float 1.000000e+00, float %156 %165 = select i1 %161, float 1.000000e+00, float %145 %166 = bitcast <2 x i32> %8 to <2 x float> %167 = extractelement <2 x float> %166, i32 0 %168 = extractelement <2 x float> %166, i32 1 %169 = call nsz float @llvm.amdgcn.interp.p1(float %167, i32 1, i32 1, i32 %6) #1 %170 = call nsz float @llvm.amdgcn.interp.p2(float %169, float %168, i32 1, i32 1, i32 %6) #1 %171 = fmul nsz float %170, 0x3FF0C131E0000000 %172 = fdiv nsz float 1.000000e+00, %38, !fpmath !1 %173 = fmul nsz float %171, %172 %174 = call nsz float @llvm.sin.f32(float %173) #1 %175 = fadd nsz float %174, 1.000000e+00 %176 = fmul nsz float %175, 1.250000e-01 %177 = fadd nsz float %176, 7.500000e-01 %178 = fmul nsz float %165, %177 br label %endif50 endif50: ; preds = %if22, %main_body %.03 = phi float [ %178, %if22 ], [ undef, %main_body ] %.02 = phi float [ %164, %if22 ], [ undef, %main_body ] %.01 = phi float [ %163, %if22 ], [ undef, %main_body ] %.0 = phi float [ %162, %if22 ], [ undef, %main_body ] %179 = bitcast float %5 to i32 %180 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %179, 10 %181 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %180, float %.0, 11 %182 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %181, float %.01, 12 %183 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %182, float %.02, 13 %184 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %183, float %.03, 14 %185 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %184, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %185 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readonly } !0 = !{} !1 = !{float 2.500000e+00} FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1].xy, GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1..4] DCL TEMP[0..10], LOCAL IMM[0] FLT32 { 2.0000, 0.5000, 1.0000, -0.5000} IMM[1] FLT32 { 0.7071, 1.5708, -0.0236, 0.0813} IMM[2] FLT32 { -0.2146, 0.0000, 6.2831, 1.1000} IMM[3] FLT32 { 5.0000, 3.0000, 4.0000, 6.0000} IMM[4] FLT32 { 7.0000, 8.0000, 9.0000, 10.0000} IMM[5] FLT32 { 0.1000, 0.3300, 0.7000, 0.0000} 0: ADD TEMP[0].x, CONST[3].xxxx, CONST[3].yyyy 1: RCP TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].x, IMM[0].xxxx, TEMP[0].xxxx 3: MOV TEMP[1].xy, IN[1].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[0], 2D 5: MUL TEMP[1], IN[0], TEMP[1] 6: MUL TEMP[2].x, CONST[1].xxxx, IMM[0].xxxx 7: MOV TEMP[2].x, |TEMP[2].xxxx| 8: POW TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy 9: MIN TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz 10: ADD TEMP[3].xy, IN[1].xyyy, IMM[0].wwww 11: DP2 TEMP[4].x, TEMP[3].xyyy, TEMP[3].xyyy 12: SQRT TEMP[4].x, TEMP[4].xxxx 13: ADD TEMP[5].x, TEMP[4].xxxx, -CONST[2].xxxx 14: ADD TEMP[6].x, IMM[1].xxxx, -CONST[2].xxxx 15: RCP TEMP[6].x, TEMP[6].xxxx 16: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 17: FSLT TEMP[6].x, CONST[2].xxxx, TEMP[4].xxxx 18: UIF TEMP[6].xxxx 19: RCP TEMP[6].x, TEMP[4].xxxx 20: MUL TEMP[6].x, TEMP[3].xxxx, TEMP[6].xxxx 21: SSG TEMP[7].x, TEMP[6].xxxx 22: MOV TEMP[8].x, |TEMP[6].xxxx| 23: ADD TEMP[8].x, IMM[0].zzzz, -TEMP[8].xxxx 24: SQRT TEMP[8].x, TEMP[8].xxxx 25: MOV TEMP[9].x, |TEMP[6].xxxx| 26: MOV TEMP[10].x, |TEMP[6].xxxx| 27: MOV TEMP[6].x, |TEMP[6].xxxx| 28: MAD TEMP[6].x, TEMP[6].xxxx, IMM[1].zzzz, IMM[1].wwww 29: MAD TEMP[6].x, TEMP[10].xxxx, TEMP[6].xxxx, IMM[2].xxxx 30: MAD TEMP[6].x, TEMP[9].xxxx, TEMP[6].xxxx, IMM[1].yyyy 31: MUL TEMP[6].x, TEMP[8].xxxx, TEMP[6].xxxx 32: ADD TEMP[6].x, IMM[1].yyyy, -TEMP[6].xxxx 33: MUL TEMP[6].x, TEMP[7].xxxx, TEMP[6].xxxx 34: ADD TEMP[6].x, IMM[1].yyyy, -TEMP[6].xxxx 35: SSG TEMP[7].x, TEMP[3].yyyy 36: FSLT TEMP[7].x, TEMP[7].xxxx, IMM[2].yyyy 37: ADD TEMP[8].x, IMM[2].zzzz, -TEMP[6].xxxx 38: UCMP TEMP[3].x, TEMP[7].xxxx, TEMP[8].xxxx, TEMP[6].xxxx 39: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[5].xxxx 40: ADD TEMP[6].x, IMM[2].wwww, -TEMP[2].xxxx 41: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[6].xxxx 42: MUL TEMP[0].x, TEMP[0].xxxx, IMM[3].xxxx 43: ADD TEMP[6].x, TEMP[4].xxxx, -TEMP[0].xxxx 44: COS TEMP[7].x, TEMP[3].xxxx 45: MUL TEMP[7].x, TEMP[6].xxxx, TEMP[7].xxxx 46: SIN TEMP[8].x, TEMP[3].xxxx 47: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx 48: MOV TEMP[7].y, TEMP[6].xxxx 49: ADD TEMP[6].xy, TEMP[7].xyyy, IMM[0].yyyy 50: MOV TEMP[6].xy, TEMP[6].xyyy 51: TEX TEMP[6], TEMP[6], SAMP[0], 2D 52: MAD TEMP[1], IN[0], TEMP[6], TEMP[1] 53: MUL TEMP[6].x, IMM[0].xxxx, TEMP[0].xxxx 54: ADD TEMP[6].x, TEMP[4].xxxx, -TEMP[6].xxxx 55: COS TEMP[7].x, TEMP[3].xxxx 56: MUL TEMP[7].x, TEMP[6].xxxx, TEMP[7].xxxx 57: SIN TEMP[8].x, TEMP[3].xxxx 58: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx 59: MOV TEMP[7].y, TEMP[6].xxxx 60: ADD TEMP[6].xy, TEMP[7].xyyy, IMM[0].yyyy 61: MOV TEMP[6].xy, TEMP[6].xyyy 62: TEX TEMP[6], TEMP[6], SAMP[0], 2D 63: MAD TEMP[1], IN[0], TEMP[6], TEMP[1] 64: MUL TEMP[6].x, IMM[3].yyyy, TEMP[0].xxxx 65: ADD TEMP[6].x, TEMP[4].xxxx, -TEMP[6].xxxx 66: COS TEMP[7].x, TEMP[3].xxxx 67: MUL TEMP[7].x, TEMP[6].xxxx, TEMP[7].xxxx 68: SIN TEMP[8].x, TEMP[3].xxxx 69: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx 70: MOV TEMP[7].y, TEMP[6].xxxx 71: ADD TEMP[6].xy, TEMP[7].xyyy, IMM[0].yyyy 72: MOV TEMP[6].xy, TEMP[6].xyyy 73: TEX TEMP[6], TEMP[6], SAMP[0], 2D 74: MAD TEMP[1], IN[0], TEMP[6], TEMP[1] 75: MUL TEMP[6].x, IMM[3].zzzz, TEMP[0].xxxx 76: ADD TEMP[6].x, TEMP[4].xxxx, -TEMP[6].xxxx 77: COS TEMP[7].x, TEMP[3].xxxx 78: MUL TEMP[7].x, TEMP[6].xxxx, TEMP[7].xxxx 79: SIN TEMP[8].x, TEMP[3].xxxx 80: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx 81: MOV TEMP[7].y, TEMP[6].xxxx 82: ADD TEMP[6].xy, TEMP[7].xyyy, IMM[0].yyyy 83: MOV TEMP[6].xy, TEMP[6].xyyy 84: TEX TEMP[6], TEMP[6], SAMP[0], 2D 85: MAD TEMP[1], IN[0], TEMP[6], TEMP[1] 86: MUL TEMP[6].x, IMM[3].xxxx, TEMP[0].xxxx 87: ADD TEMP[6].x, TEMP[4].xxxx, -TEMP[6].xxxx 88: COS TEMP[7].x, TEMP[3].xxxx 89: MUL TEMP[7].x, TEMP[6].xxxx, TEMP[7].xxxx 90: SIN TEMP[8].x, TEMP[3].xxxx 91: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx 92: MOV TEMP[7].y, TEMP[6].xxxx 93: ADD TEMP[6].xy, TEMP[7].xyyy, IMM[0].yyyy 94: MOV TEMP[6].xy, TEMP[6].xyyy 95: TEX TEMP[6], TEMP[6], SAMP[0], 2D 96: MAD TEMP[1], IN[0], TEMP[6], TEMP[1] 97: MUL TEMP[6].x, IMM[3].wwww, TEMP[0].xxxx 98: ADD TEMP[6].x, TEMP[4].xxxx, -TEMP[6].xxxx 99: COS TEMP[7].x, TEMP[3].xxxx 100: MUL TEMP[7].x, TEMP[6].xxxx, TEMP[7].xxxx 101: SIN TEMP[8].x, TEMP[3].xxxx 102: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx 103: MOV TEMP[7].y, TEMP[6].xxxx 104: ADD TEMP[6].xy, TEMP[7].xyyy, IMM[0].yyyy 105: MOV TEMP[6].xy, TEMP[6].xyyy 106: TEX TEMP[6], TEMP[6], SAMP[0], 2D 107: MAD TEMP[1], IN[0], TEMP[6], TEMP[1] 108: MUL TEMP[6].x, IMM[4].xxxx, TEMP[0].xxxx 109: ADD TEMP[6].x, TEMP[4].xxxx, -TEMP[6].xxxx 110: COS TEMP[7].x, TEMP[3].xxxx 111: MUL TEMP[7].x, TEMP[6].xxxx, TEMP[7].xxxx 112: SIN TEMP[8].x, TEMP[3].xxxx 113: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx 114: MOV TEMP[7].y, TEMP[6].xxxx 115: ADD TEMP[6].xy, TEMP[7].xyyy, IMM[0].yyyy 116: MOV TEMP[6].xy, TEMP[6].xyyy 117: TEX TEMP[6], TEMP[6], SAMP[0], 2D 118: MAD TEMP[1], IN[0], TEMP[6], TEMP[1] 119: MUL TEMP[6].x, IMM[4].yyyy, TEMP[0].xxxx 120: ADD TEMP[6].x, TEMP[4].xxxx, -TEMP[6].xxxx 121: COS TEMP[7].x, TEMP[3].xxxx 122: MUL TEMP[7].x, TEMP[6].xxxx, TEMP[7].xxxx 123: SIN TEMP[8].x, TEMP[3].xxxx 124: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx 125: MOV TEMP[7].y, TEMP[6].xxxx 126: ADD TEMP[6].xy, TEMP[7].xyyy, IMM[0].yyyy 127: MOV TEMP[6].xy, TEMP[6].xyyy 128: TEX TEMP[6], TEMP[6], SAMP[0], 2D 129: MAD TEMP[1], IN[0], TEMP[6], TEMP[1] 130: MUL TEMP[0].x, IMM[4].zzzz, TEMP[0].xxxx 131: ADD TEMP[0].x, TEMP[4].xxxx, -TEMP[0].xxxx 132: COS TEMP[6].x, TEMP[3].xxxx 133: MUL TEMP[6].x, TEMP[0].xxxx, TEMP[6].xxxx 134: SIN TEMP[3].x, TEMP[3].xxxx 135: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[3].xxxx 136: MOV TEMP[6].y, TEMP[0].xxxx 137: ADD TEMP[0].xy, TEMP[6].xyyy, IMM[0].yyyy 138: MOV TEMP[0].xy, TEMP[0].xyyy 139: TEX TEMP[0], TEMP[0], SAMP[0], 2D 140: MAD TEMP[1], IN[0], TEMP[0], TEMP[1] 141: MUL TEMP[1], TEMP[1], IMM[5].xxxx 142: ENDIF 143: DP3 TEMP[0].x, TEMP[1].xyzz, IMM[5].yyyy 144: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[1].xyzz, TEMP[0].xxxx 145: ADD TEMP[0].x, IMM[0].zzzz, -TEMP[2].xxxx 146: MAD TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy, IMM[0].zzzz 147: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[0].xxxx 148: FSLT TEMP[0].x, CONST[2].xxxx, TEMP[4].xxxx 149: UIF TEMP[0].xxxx 150: FSLT TEMP[0].x, CONST[4].xxxx, IMM[5].zzzz 151: UIF TEMP[0].xxxx 152: ADD TEMP[0].x, IMM[0].zzzz, -TEMP[2].xxxx 153: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[5].xxxx 154: MAD TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy, IMM[0].zzzz 155: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[0].xxxx 156: ELSE 157: ADD TEMP[0].x, IMM[0].zzzz, -TEMP[2].xxxx 158: MUL TEMP[0].x, CONST[4].xxxx, TEMP[0].xxxx 159: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[5].xxxx 160: MAD TEMP[1].x, TEMP[0].xxxx, IMM[0].yyyy, TEMP[1].xxxx 161: ENDIF 162: ENDIF 163: MOV OUT[0], TEMP[1] 164: END radeonsi: Compiling shader 32 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !invariant.load !0 %25 = bitcast <2 x i32> %8 to <2 x float> %26 = extractelement <2 x float> %25, i32 0 %27 = extractelement <2 x float> %25, i32 1 %28 = call nsz float @llvm.amdgcn.interp.p1(float %26, i32 0, i32 1, i32 %6) #1 %29 = call nsz float @llvm.amdgcn.interp.p2(float %28, float %27, i32 0, i32 1, i32 %6) #1 %30 = bitcast <2 x i32> %8 to <2 x float> %31 = extractelement <2 x float> %30, i32 0 %32 = extractelement <2 x float> %30, i32 1 %33 = call nsz float @llvm.amdgcn.interp.p1(float %31, i32 1, i32 1, i32 %6) #1 %34 = call nsz float @llvm.amdgcn.interp.p2(float %33, float %32, i32 1, i32 1, i32 %6) #1 %35 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %36 = load <8 x i32>, <8 x i32> addrspace(2)* %35, align 32, !invariant.load !0 %37 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %38 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %37, i64 0, i64 3, !amdgpu.uniform !0 %39 = load <4 x i32>, <4 x i32> addrspace(2)* %38, align 16, !invariant.load !0 %40 = bitcast float %29 to i32 %41 = bitcast float %34 to i32 %42 = insertelement <2 x i32> undef, i32 %40, i32 0 %43 = insertelement <2 x i32> %42, i32 %41, i32 1 %44 = bitcast <2 x i32> %43 to <2 x float> %45 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %44, <8 x i32> %36, <4 x i32> %39, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = bitcast <2 x i32> %8 to <2 x float> %51 = extractelement <2 x float> %50, i32 0 %52 = extractelement <2 x float> %50, i32 1 %53 = call nsz float @llvm.amdgcn.interp.p1(float %51, i32 0, i32 0, i32 %6) #1 %54 = call nsz float @llvm.amdgcn.interp.p2(float %53, float %52, i32 0, i32 0, i32 %6) #1 %55 = fmul nsz float %54, %46 %56 = bitcast <2 x i32> %8 to <2 x float> %57 = extractelement <2 x float> %56, i32 0 %58 = extractelement <2 x float> %56, i32 1 %59 = call nsz float @llvm.amdgcn.interp.p1(float %57, i32 1, i32 0, i32 %6) #1 %60 = call nsz float @llvm.amdgcn.interp.p2(float %59, float %58, i32 1, i32 0, i32 %6) #1 %61 = fmul nsz float %60, %47 %62 = bitcast <2 x i32> %8 to <2 x float> %63 = extractelement <2 x float> %62, i32 0 %64 = extractelement <2 x float> %62, i32 1 %65 = call nsz float @llvm.amdgcn.interp.p1(float %63, i32 2, i32 0, i32 %6) #1 %66 = call nsz float @llvm.amdgcn.interp.p2(float %65, float %64, i32 2, i32 0, i32 %6) #1 %67 = fmul nsz float %66, %48 %68 = bitcast <2 x i32> %8 to <2 x float> %69 = extractelement <2 x float> %68, i32 0 %70 = extractelement <2 x float> %68, i32 1 %71 = call nsz float @llvm.amdgcn.interp.p1(float %69, i32 3, i32 0, i32 %6) #1 %72 = call nsz float @llvm.amdgcn.interp.p2(float %71, float %70, i32 3, i32 0, i32 %6) #1 %73 = fmul nsz float %72, %49 %74 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %75 = load <16 x i8>, <16 x i8> addrspace(2)* %74, align 16, !invariant.load !0 %76 = call nsz float @llvm.SI.load.const(<16 x i8> %75, i32 16) %77 = fmul nsz float %76, 2.000000e+00 %78 = call nsz float @llvm.fabs.f32(float %77) #3 %sqrtf = call float @sqrtf(float %78) #1 %fabsf = call float @fabsf(float %sqrtf) #1 %79 = fcmp oeq float %78, 0xFFF0000000000000 %80 = select i1 %79, float 0x7FF0000000000000, float %fabsf %81 = call nsz float @llvm.minnum.f32(float %80, float 1.000000e+00) #1 %82 = bitcast <2 x i32> %8 to <2 x float> %83 = extractelement <2 x float> %82, i32 0 %84 = extractelement <2 x float> %82, i32 1 %85 = call nsz float @llvm.amdgcn.interp.p1(float %83, i32 0, i32 1, i32 %6) #1 %86 = call nsz float @llvm.amdgcn.interp.p2(float %85, float %84, i32 0, i32 1, i32 %6) #1 %87 = fadd nsz float %86, -5.000000e-01 %88 = bitcast <2 x i32> %8 to <2 x float> %89 = extractelement <2 x float> %88, i32 0 %90 = extractelement <2 x float> %88, i32 1 %91 = call nsz float @llvm.amdgcn.interp.p1(float %89, i32 1, i32 1, i32 %6) #1 %92 = call nsz float @llvm.amdgcn.interp.p2(float %91, float %90, i32 1, i32 1, i32 %6) #1 %93 = fadd nsz float %92, -5.000000e-01 %94 = fmul nsz float %87, %87 %95 = fmul nsz float %93, %93 %96 = fadd nsz float %94, %95 %97 = call nsz float @llvm.sqrt.f32(float %96) #1 %98 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %99 = load <16 x i8>, <16 x i8> addrspace(2)* %98, align 16, !invariant.load !0 %100 = call nsz float @llvm.SI.load.const(<16 x i8> %99, i32 32) %101 = fsub nsz float %97, %100 %102 = call nsz float @llvm.SI.load.const(<16 x i8> %99, i32 32) %103 = fsub nsz float 0x3FE6A09E60000000, %102 %104 = fdiv nsz float 1.000000e+00, %103, !fpmath !1 %105 = fmul nsz float %101, %104 %106 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %107 = load <16 x i8>, <16 x i8> addrspace(2)* %106, align 16, !invariant.load !0 %108 = call nsz float @llvm.SI.load.const(<16 x i8> %107, i32 32) %109 = fcmp nsz olt float %108, %97 br i1 %109, label %if18, label %endif142 if18: ; preds = %main_body %110 = call nsz float @llvm.SI.load.const(<16 x i8> %24, i32 48) %111 = call nsz float @llvm.SI.load.const(<16 x i8> %24, i32 52) %112 = fadd nsz float %110, %111 %113 = fdiv nsz float 1.000000e+00, %112, !fpmath !1 %114 = fmul nsz float %113, 2.000000e+00 %115 = fdiv nsz float 1.000000e+00, %97, !fpmath !1 %116 = fmul nsz float %87, %115 %117 = fcmp nsz ogt float %116, 0.000000e+00 %118 = select i1 %117, float 1.000000e+00, float %116 %119 = fcmp nsz oge float %118, 0.000000e+00 %120 = select i1 %119, float %118, float -1.000000e+00 %121 = call nsz float @llvm.fabs.f32(float %116) #3 %122 = fsub nsz float 1.000000e+00, %121 %123 = call nsz float @llvm.sqrt.f32(float %122) #1 %124 = call nsz float @llvm.fabs.f32(float %116) #3 %125 = call nsz float @llvm.fabs.f32(float %116) #3 %126 = call nsz float @llvm.fabs.f32(float %116) #3 %127 = fmul nsz float %126, 0xBF98334BE0000000 %128 = fadd nsz float %127, 0x3FB4D1B0E0000000 %129 = fmul nsz float %125, %128 %130 = fadd nsz float %129, 0xBFCB781280000000 %131 = fmul nsz float %124, %130 %132 = fadd nsz float %131, 0x3FF921FB60000000 %133 = fmul nsz float %123, %132 %134 = fsub nsz float 0x3FF921FB60000000, %133 %135 = fmul nsz float %120, %134 %136 = fsub nsz float 0x3FF921FB60000000, %135 %137 = fcmp nsz ogt float %93, 0.000000e+00 %138 = select i1 %137, float 1.000000e+00, float %93 %139 = fcmp nsz oge float %138, 0.000000e+00 %140 = select i1 %139, float %138, float -1.000000e+00 %141 = fcmp nsz olt float %140, 0.000000e+00 %142 = fsub nsz float 0x401921E500000000, %136 %143 = select i1 %141, float %142, float %136 %144 = fmul nsz float %114, %105 %145 = fsub nsz float 0x3FF19999A0000000, %81 %146 = fmul nsz float %144, %145 %147 = fmul nsz float %146, 5.000000e+00 %148 = fsub nsz float %97, %147 %149 = call nsz float @llvm.cos.f32(float %143) #1 %150 = fmul nsz float %148, %149 %151 = call nsz float @llvm.sin.f32(float %143) #1 %152 = fmul nsz float %148, %151 %153 = fadd nsz float %150, 5.000000e-01 %154 = fadd nsz float %152, 5.000000e-01 %155 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %156 = load <8 x i32>, <8 x i32> addrspace(2)* %155, align 32, !invariant.load !0 %157 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %158 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %157, i64 0, i64 3, !amdgpu.uniform !0 %159 = load <4 x i32>, <4 x i32> addrspace(2)* %158, align 16, !invariant.load !0 %160 = bitcast float %153 to i32 %161 = bitcast float %154 to i32 %162 = insertelement <2 x i32> undef, i32 %160, i32 0 %163 = insertelement <2 x i32> %162, i32 %161, i32 1 %164 = bitcast <2 x i32> %163 to <2 x float> %165 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %164, <8 x i32> %156, <4 x i32> %159, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %166 = extractelement <4 x float> %165, i32 0 %167 = extractelement <4 x float> %165, i32 1 %168 = extractelement <4 x float> %165, i32 2 %169 = extractelement <4 x float> %165, i32 3 %170 = bitcast <2 x i32> %8 to <2 x float> %171 = extractelement <2 x float> %170, i32 0 %172 = extractelement <2 x float> %170, i32 1 %173 = call nsz float @llvm.amdgcn.interp.p1(float %171, i32 0, i32 0, i32 %6) #1 %174 = call nsz float @llvm.amdgcn.interp.p2(float %173, float %172, i32 0, i32 0, i32 %6) #1 %175 = fmul nsz float %174, %166 %176 = fadd nsz float %175, %55 %177 = bitcast <2 x i32> %8 to <2 x float> %178 = extractelement <2 x float> %177, i32 0 %179 = extractelement <2 x float> %177, i32 1 %180 = call nsz float @llvm.amdgcn.interp.p1(float %178, i32 1, i32 0, i32 %6) #1 %181 = call nsz float @llvm.amdgcn.interp.p2(float %180, float %179, i32 1, i32 0, i32 %6) #1 %182 = fmul nsz float %181, %167 %183 = fadd nsz float %182, %61 %184 = bitcast <2 x i32> %8 to <2 x float> %185 = extractelement <2 x float> %184, i32 0 %186 = extractelement <2 x float> %184, i32 1 %187 = call nsz float @llvm.amdgcn.interp.p1(float %185, i32 2, i32 0, i32 %6) #1 %188 = call nsz float @llvm.amdgcn.interp.p2(float %187, float %186, i32 2, i32 0, i32 %6) #1 %189 = fmul nsz float %188, %168 %190 = fadd nsz float %189, %67 %191 = bitcast <2 x i32> %8 to <2 x float> %192 = extractelement <2 x float> %191, i32 0 %193 = extractelement <2 x float> %191, i32 1 %194 = call nsz float @llvm.amdgcn.interp.p1(float %192, i32 3, i32 0, i32 %6) #1 %195 = call nsz float @llvm.amdgcn.interp.p2(float %194, float %193, i32 3, i32 0, i32 %6) #1 %196 = fmul nsz float %195, %169 %197 = fadd nsz float %196, %73 %198 = fmul nsz float %147, 2.000000e+00 %199 = fsub nsz float %97, %198 %200 = call nsz float @llvm.cos.f32(float %143) #1 %201 = fmul nsz float %199, %200 %202 = call nsz float @llvm.sin.f32(float %143) #1 %203 = fmul nsz float %199, %202 %204 = fadd nsz float %201, 5.000000e-01 %205 = fadd nsz float %203, 5.000000e-01 %206 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %207 = load <8 x i32>, <8 x i32> addrspace(2)* %206, align 32, !invariant.load !0 %208 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %209 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %208, i64 0, i64 3, !amdgpu.uniform !0 %210 = load <4 x i32>, <4 x i32> addrspace(2)* %209, align 16, !invariant.load !0 %211 = bitcast float %204 to i32 %212 = bitcast float %205 to i32 %213 = insertelement <2 x i32> undef, i32 %211, i32 0 %214 = insertelement <2 x i32> %213, i32 %212, i32 1 %215 = bitcast <2 x i32> %214 to <2 x float> %216 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %215, <8 x i32> %207, <4 x i32> %210, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %217 = extractelement <4 x float> %216, i32 0 %218 = extractelement <4 x float> %216, i32 1 %219 = extractelement <4 x float> %216, i32 2 %220 = extractelement <4 x float> %216, i32 3 %221 = bitcast <2 x i32> %8 to <2 x float> %222 = extractelement <2 x float> %221, i32 0 %223 = extractelement <2 x float> %221, i32 1 %224 = call nsz float @llvm.amdgcn.interp.p1(float %222, i32 0, i32 0, i32 %6) #1 %225 = call nsz float @llvm.amdgcn.interp.p2(float %224, float %223, i32 0, i32 0, i32 %6) #1 %226 = fmul nsz float %225, %217 %227 = fadd nsz float %226, %176 %228 = bitcast <2 x i32> %8 to <2 x float> %229 = extractelement <2 x float> %228, i32 0 %230 = extractelement <2 x float> %228, i32 1 %231 = call nsz float @llvm.amdgcn.interp.p1(float %229, i32 1, i32 0, i32 %6) #1 %232 = call nsz float @llvm.amdgcn.interp.p2(float %231, float %230, i32 1, i32 0, i32 %6) #1 %233 = fmul nsz float %232, %218 %234 = fadd nsz float %233, %183 %235 = bitcast <2 x i32> %8 to <2 x float> %236 = extractelement <2 x float> %235, i32 0 %237 = extractelement <2 x float> %235, i32 1 %238 = call nsz float @llvm.amdgcn.interp.p1(float %236, i32 2, i32 0, i32 %6) #1 %239 = call nsz float @llvm.amdgcn.interp.p2(float %238, float %237, i32 2, i32 0, i32 %6) #1 %240 = fmul nsz float %239, %219 %241 = fadd nsz float %240, %190 %242 = bitcast <2 x i32> %8 to <2 x float> %243 = extractelement <2 x float> %242, i32 0 %244 = extractelement <2 x float> %242, i32 1 %245 = call nsz float @llvm.amdgcn.interp.p1(float %243, i32 3, i32 0, i32 %6) #1 %246 = call nsz float @llvm.amdgcn.interp.p2(float %245, float %244, i32 3, i32 0, i32 %6) #1 %247 = fmul nsz float %246, %220 %248 = fadd nsz float %247, %197 %249 = fmul nsz float %147, 3.000000e+00 %250 = fsub nsz float %97, %249 %251 = call nsz float @llvm.cos.f32(float %143) #1 %252 = fmul nsz float %250, %251 %253 = call nsz float @llvm.sin.f32(float %143) #1 %254 = fmul nsz float %250, %253 %255 = fadd nsz float %252, 5.000000e-01 %256 = fadd nsz float %254, 5.000000e-01 %257 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %258 = load <8 x i32>, <8 x i32> addrspace(2)* %257, align 32, !invariant.load !0 %259 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %260 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %259, i64 0, i64 3, !amdgpu.uniform !0 %261 = load <4 x i32>, <4 x i32> addrspace(2)* %260, align 16, !invariant.load !0 %262 = bitcast float %255 to i32 %263 = bitcast float %256 to i32 %264 = insertelement <2 x i32> undef, i32 %262, i32 0 %265 = insertelement <2 x i32> %264, i32 %263, i32 1 %266 = bitcast <2 x i32> %265 to <2 x float> %267 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %266, <8 x i32> %258, <4 x i32> %261, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %268 = extractelement <4 x float> %267, i32 0 %269 = extractelement <4 x float> %267, i32 1 %270 = extractelement <4 x float> %267, i32 2 %271 = extractelement <4 x float> %267, i32 3 %272 = bitcast <2 x i32> %8 to <2 x float> %273 = extractelement <2 x float> %272, i32 0 %274 = extractelement <2 x float> %272, i32 1 %275 = call nsz float @llvm.amdgcn.interp.p1(float %273, i32 0, i32 0, i32 %6) #1 %276 = call nsz float @llvm.amdgcn.interp.p2(float %275, float %274, i32 0, i32 0, i32 %6) #1 %277 = fmul nsz float %276, %268 %278 = fadd nsz float %277, %227 %279 = bitcast <2 x i32> %8 to <2 x float> %280 = extractelement <2 x float> %279, i32 0 %281 = extractelement <2 x float> %279, i32 1 %282 = call nsz float @llvm.amdgcn.interp.p1(float %280, i32 1, i32 0, i32 %6) #1 %283 = call nsz float @llvm.amdgcn.interp.p2(float %282, float %281, i32 1, i32 0, i32 %6) #1 %284 = fmul nsz float %283, %269 %285 = fadd nsz float %284, %234 %286 = bitcast <2 x i32> %8 to <2 x float> %287 = extractelement <2 x float> %286, i32 0 %288 = extractelement <2 x float> %286, i32 1 %289 = call nsz float @llvm.amdgcn.interp.p1(float %287, i32 2, i32 0, i32 %6) #1 %290 = call nsz float @llvm.amdgcn.interp.p2(float %289, float %288, i32 2, i32 0, i32 %6) #1 %291 = fmul nsz float %290, %270 %292 = fadd nsz float %291, %241 %293 = bitcast <2 x i32> %8 to <2 x float> %294 = extractelement <2 x float> %293, i32 0 %295 = extractelement <2 x float> %293, i32 1 %296 = call nsz float @llvm.amdgcn.interp.p1(float %294, i32 3, i32 0, i32 %6) #1 %297 = call nsz float @llvm.amdgcn.interp.p2(float %296, float %295, i32 3, i32 0, i32 %6) #1 %298 = fmul nsz float %297, %271 %299 = fadd nsz float %298, %248 %300 = fmul nsz float %147, 4.000000e+00 %301 = fsub nsz float %97, %300 %302 = call nsz float @llvm.cos.f32(float %143) #1 %303 = fmul nsz float %301, %302 %304 = call nsz float @llvm.sin.f32(float %143) #1 %305 = fmul nsz float %301, %304 %306 = fadd nsz float %303, 5.000000e-01 %307 = fadd nsz float %305, 5.000000e-01 %308 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %309 = load <8 x i32>, <8 x i32> addrspace(2)* %308, align 32, !invariant.load !0 %310 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %311 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %310, i64 0, i64 3, !amdgpu.uniform !0 %312 = load <4 x i32>, <4 x i32> addrspace(2)* %311, align 16, !invariant.load !0 %313 = bitcast float %306 to i32 %314 = bitcast float %307 to i32 %315 = insertelement <2 x i32> undef, i32 %313, i32 0 %316 = insertelement <2 x i32> %315, i32 %314, i32 1 %317 = bitcast <2 x i32> %316 to <2 x float> %318 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %317, <8 x i32> %309, <4 x i32> %312, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %319 = extractelement <4 x float> %318, i32 0 %320 = extractelement <4 x float> %318, i32 1 %321 = extractelement <4 x float> %318, i32 2 %322 = extractelement <4 x float> %318, i32 3 %323 = bitcast <2 x i32> %8 to <2 x float> %324 = extractelement <2 x float> %323, i32 0 %325 = extractelement <2 x float> %323, i32 1 %326 = call nsz float @llvm.amdgcn.interp.p1(float %324, i32 0, i32 0, i32 %6) #1 %327 = call nsz float @llvm.amdgcn.interp.p2(float %326, float %325, i32 0, i32 0, i32 %6) #1 %328 = fmul nsz float %327, %319 %329 = fadd nsz float %328, %278 %330 = bitcast <2 x i32> %8 to <2 x float> %331 = extractelement <2 x float> %330, i32 0 %332 = extractelement <2 x float> %330, i32 1 %333 = call nsz float @llvm.amdgcn.interp.p1(float %331, i32 1, i32 0, i32 %6) #1 %334 = call nsz float @llvm.amdgcn.interp.p2(float %333, float %332, i32 1, i32 0, i32 %6) #1 %335 = fmul nsz float %334, %320 %336 = fadd nsz float %335, %285 %337 = bitcast <2 x i32> %8 to <2 x float> %338 = extractelement <2 x float> %337, i32 0 %339 = extractelement <2 x float> %337, i32 1 %340 = call nsz float @llvm.amdgcn.interp.p1(float %338, i32 2, i32 0, i32 %6) #1 %341 = call nsz float @llvm.amdgcn.interp.p2(float %340, float %339, i32 2, i32 0, i32 %6) #1 %342 = fmul nsz float %341, %321 %343 = fadd nsz float %342, %292 %344 = bitcast <2 x i32> %8 to <2 x float> %345 = extractelement <2 x float> %344, i32 0 %346 = extractelement <2 x float> %344, i32 1 %347 = call nsz float @llvm.amdgcn.interp.p1(float %345, i32 3, i32 0, i32 %6) #1 %348 = call nsz float @llvm.amdgcn.interp.p2(float %347, float %346, i32 3, i32 0, i32 %6) #1 %349 = fmul nsz float %348, %322 %350 = fadd nsz float %349, %299 %351 = fmul nsz float %147, 5.000000e+00 %352 = fsub nsz float %97, %351 %353 = call nsz float @llvm.cos.f32(float %143) #1 %354 = fmul nsz float %352, %353 %355 = call nsz float @llvm.sin.f32(float %143) #1 %356 = fmul nsz float %352, %355 %357 = fadd nsz float %354, 5.000000e-01 %358 = fadd nsz float %356, 5.000000e-01 %359 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %360 = load <8 x i32>, <8 x i32> addrspace(2)* %359, align 32, !invariant.load !0 %361 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %362 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %361, i64 0, i64 3, !amdgpu.uniform !0 %363 = load <4 x i32>, <4 x i32> addrspace(2)* %362, align 16, !invariant.load !0 %364 = bitcast float %357 to i32 %365 = bitcast float %358 to i32 %366 = insertelement <2 x i32> undef, i32 %364, i32 0 %367 = insertelement <2 x i32> %366, i32 %365, i32 1 %368 = bitcast <2 x i32> %367 to <2 x float> %369 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %368, <8 x i32> %360, <4 x i32> %363, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %370 = extractelement <4 x float> %369, i32 0 %371 = extractelement <4 x float> %369, i32 1 %372 = extractelement <4 x float> %369, i32 2 %373 = extractelement <4 x float> %369, i32 3 %374 = bitcast <2 x i32> %8 to <2 x float> %375 = extractelement <2 x float> %374, i32 0 %376 = extractelement <2 x float> %374, i32 1 %377 = call nsz float @llvm.amdgcn.interp.p1(float %375, i32 0, i32 0, i32 %6) #1 %378 = call nsz float @llvm.amdgcn.interp.p2(float %377, float %376, i32 0, i32 0, i32 %6) #1 %379 = fmul nsz float %378, %370 %380 = fadd nsz float %379, %329 %381 = bitcast <2 x i32> %8 to <2 x float> %382 = extractelement <2 x float> %381, i32 0 %383 = extractelement <2 x float> %381, i32 1 %384 = call nsz float @llvm.amdgcn.interp.p1(float %382, i32 1, i32 0, i32 %6) #1 %385 = call nsz float @llvm.amdgcn.interp.p2(float %384, float %383, i32 1, i32 0, i32 %6) #1 %386 = fmul nsz float %385, %371 %387 = fadd nsz float %386, %336 %388 = bitcast <2 x i32> %8 to <2 x float> %389 = extractelement <2 x float> %388, i32 0 %390 = extractelement <2 x float> %388, i32 1 %391 = call nsz float @llvm.amdgcn.interp.p1(float %389, i32 2, i32 0, i32 %6) #1 %392 = call nsz float @llvm.amdgcn.interp.p2(float %391, float %390, i32 2, i32 0, i32 %6) #1 %393 = fmul nsz float %392, %372 %394 = fadd nsz float %393, %343 %395 = bitcast <2 x i32> %8 to <2 x float> %396 = extractelement <2 x float> %395, i32 0 %397 = extractelement <2 x float> %395, i32 1 %398 = call nsz float @llvm.amdgcn.interp.p1(float %396, i32 3, i32 0, i32 %6) #1 %399 = call nsz float @llvm.amdgcn.interp.p2(float %398, float %397, i32 3, i32 0, i32 %6) #1 %400 = fmul nsz float %399, %373 %401 = fadd nsz float %400, %350 %402 = fmul nsz float %147, 6.000000e+00 %403 = fsub nsz float %97, %402 %404 = call nsz float @llvm.cos.f32(float %143) #1 %405 = fmul nsz float %403, %404 %406 = call nsz float @llvm.sin.f32(float %143) #1 %407 = fmul nsz float %403, %406 %408 = fadd nsz float %405, 5.000000e-01 %409 = fadd nsz float %407, 5.000000e-01 %410 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %411 = load <8 x i32>, <8 x i32> addrspace(2)* %410, align 32, !invariant.load !0 %412 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %413 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %412, i64 0, i64 3, !amdgpu.uniform !0 %414 = load <4 x i32>, <4 x i32> addrspace(2)* %413, align 16, !invariant.load !0 %415 = bitcast float %408 to i32 %416 = bitcast float %409 to i32 %417 = insertelement <2 x i32> undef, i32 %415, i32 0 %418 = insertelement <2 x i32> %417, i32 %416, i32 1 %419 = bitcast <2 x i32> %418 to <2 x float> %420 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %419, <8 x i32> %411, <4 x i32> %414, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %421 = extractelement <4 x float> %420, i32 0 %422 = extractelement <4 x float> %420, i32 1 %423 = extractelement <4 x float> %420, i32 2 %424 = extractelement <4 x float> %420, i32 3 %425 = bitcast <2 x i32> %8 to <2 x float> %426 = extractelement <2 x float> %425, i32 0 %427 = extractelement <2 x float> %425, i32 1 %428 = call nsz float @llvm.amdgcn.interp.p1(float %426, i32 0, i32 0, i32 %6) #1 %429 = call nsz float @llvm.amdgcn.interp.p2(float %428, float %427, i32 0, i32 0, i32 %6) #1 %430 = fmul nsz float %429, %421 %431 = fadd nsz float %430, %380 %432 = bitcast <2 x i32> %8 to <2 x float> %433 = extractelement <2 x float> %432, i32 0 %434 = extractelement <2 x float> %432, i32 1 %435 = call nsz float @llvm.amdgcn.interp.p1(float %433, i32 1, i32 0, i32 %6) #1 %436 = call nsz float @llvm.amdgcn.interp.p2(float %435, float %434, i32 1, i32 0, i32 %6) #1 %437 = fmul nsz float %436, %422 %438 = fadd nsz float %437, %387 %439 = bitcast <2 x i32> %8 to <2 x float> %440 = extractelement <2 x float> %439, i32 0 %441 = extractelement <2 x float> %439, i32 1 %442 = call nsz float @llvm.amdgcn.interp.p1(float %440, i32 2, i32 0, i32 %6) #1 %443 = call nsz float @llvm.amdgcn.interp.p2(float %442, float %441, i32 2, i32 0, i32 %6) #1 %444 = fmul nsz float %443, %423 %445 = fadd nsz float %444, %394 %446 = bitcast <2 x i32> %8 to <2 x float> %447 = extractelement <2 x float> %446, i32 0 %448 = extractelement <2 x float> %446, i32 1 %449 = call nsz float @llvm.amdgcn.interp.p1(float %447, i32 3, i32 0, i32 %6) #1 %450 = call nsz float @llvm.amdgcn.interp.p2(float %449, float %448, i32 3, i32 0, i32 %6) #1 %451 = fmul nsz float %450, %424 %452 = fadd nsz float %451, %401 %453 = fmul nsz float %147, 7.000000e+00 %454 = fsub nsz float %97, %453 %455 = call nsz float @llvm.cos.f32(float %143) #1 %456 = fmul nsz float %454, %455 %457 = call nsz float @llvm.sin.f32(float %143) #1 %458 = fmul nsz float %454, %457 %459 = fadd nsz float %456, 5.000000e-01 %460 = fadd nsz float %458, 5.000000e-01 %461 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %462 = load <8 x i32>, <8 x i32> addrspace(2)* %461, align 32, !invariant.load !0 %463 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %464 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %463, i64 0, i64 3, !amdgpu.uniform !0 %465 = load <4 x i32>, <4 x i32> addrspace(2)* %464, align 16, !invariant.load !0 %466 = bitcast float %459 to i32 %467 = bitcast float %460 to i32 %468 = insertelement <2 x i32> undef, i32 %466, i32 0 %469 = insertelement <2 x i32> %468, i32 %467, i32 1 %470 = bitcast <2 x i32> %469 to <2 x float> %471 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %470, <8 x i32> %462, <4 x i32> %465, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %472 = extractelement <4 x float> %471, i32 0 %473 = extractelement <4 x float> %471, i32 1 %474 = extractelement <4 x float> %471, i32 2 %475 = extractelement <4 x float> %471, i32 3 %476 = bitcast <2 x i32> %8 to <2 x float> %477 = extractelement <2 x float> %476, i32 0 %478 = extractelement <2 x float> %476, i32 1 %479 = call nsz float @llvm.amdgcn.interp.p1(float %477, i32 0, i32 0, i32 %6) #1 %480 = call nsz float @llvm.amdgcn.interp.p2(float %479, float %478, i32 0, i32 0, i32 %6) #1 %481 = fmul nsz float %480, %472 %482 = fadd nsz float %481, %431 %483 = bitcast <2 x i32> %8 to <2 x float> %484 = extractelement <2 x float> %483, i32 0 %485 = extractelement <2 x float> %483, i32 1 %486 = call nsz float @llvm.amdgcn.interp.p1(float %484, i32 1, i32 0, i32 %6) #1 %487 = call nsz float @llvm.amdgcn.interp.p2(float %486, float %485, i32 1, i32 0, i32 %6) #1 %488 = fmul nsz float %487, %473 %489 = fadd nsz float %488, %438 %490 = bitcast <2 x i32> %8 to <2 x float> %491 = extractelement <2 x float> %490, i32 0 %492 = extractelement <2 x float> %490, i32 1 %493 = call nsz float @llvm.amdgcn.interp.p1(float %491, i32 2, i32 0, i32 %6) #1 %494 = call nsz float @llvm.amdgcn.interp.p2(float %493, float %492, i32 2, i32 0, i32 %6) #1 %495 = fmul nsz float %494, %474 %496 = fadd nsz float %495, %445 %497 = bitcast <2 x i32> %8 to <2 x float> %498 = extractelement <2 x float> %497, i32 0 %499 = extractelement <2 x float> %497, i32 1 %500 = call nsz float @llvm.amdgcn.interp.p1(float %498, i32 3, i32 0, i32 %6) #1 %501 = call nsz float @llvm.amdgcn.interp.p2(float %500, float %499, i32 3, i32 0, i32 %6) #1 %502 = fmul nsz float %501, %475 %503 = fadd nsz float %502, %452 %504 = fmul nsz float %147, 8.000000e+00 %505 = fsub nsz float %97, %504 %506 = call nsz float @llvm.cos.f32(float %143) #1 %507 = fmul nsz float %505, %506 %508 = call nsz float @llvm.sin.f32(float %143) #1 %509 = fmul nsz float %505, %508 %510 = fadd nsz float %507, 5.000000e-01 %511 = fadd nsz float %509, 5.000000e-01 %512 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %513 = load <8 x i32>, <8 x i32> addrspace(2)* %512, align 32, !invariant.load !0 %514 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %515 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %514, i64 0, i64 3, !amdgpu.uniform !0 %516 = load <4 x i32>, <4 x i32> addrspace(2)* %515, align 16, !invariant.load !0 %517 = bitcast float %510 to i32 %518 = bitcast float %511 to i32 %519 = insertelement <2 x i32> undef, i32 %517, i32 0 %520 = insertelement <2 x i32> %519, i32 %518, i32 1 %521 = bitcast <2 x i32> %520 to <2 x float> %522 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %521, <8 x i32> %513, <4 x i32> %516, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %523 = extractelement <4 x float> %522, i32 0 %524 = extractelement <4 x float> %522, i32 1 %525 = extractelement <4 x float> %522, i32 2 %526 = extractelement <4 x float> %522, i32 3 %527 = bitcast <2 x i32> %8 to <2 x float> %528 = extractelement <2 x float> %527, i32 0 %529 = extractelement <2 x float> %527, i32 1 %530 = call nsz float @llvm.amdgcn.interp.p1(float %528, i32 0, i32 0, i32 %6) #1 %531 = call nsz float @llvm.amdgcn.interp.p2(float %530, float %529, i32 0, i32 0, i32 %6) #1 %532 = fmul nsz float %531, %523 %533 = fadd nsz float %532, %482 %534 = bitcast <2 x i32> %8 to <2 x float> %535 = extractelement <2 x float> %534, i32 0 %536 = extractelement <2 x float> %534, i32 1 %537 = call nsz float @llvm.amdgcn.interp.p1(float %535, i32 1, i32 0, i32 %6) #1 %538 = call nsz float @llvm.amdgcn.interp.p2(float %537, float %536, i32 1, i32 0, i32 %6) #1 %539 = fmul nsz float %538, %524 %540 = fadd nsz float %539, %489 %541 = bitcast <2 x i32> %8 to <2 x float> %542 = extractelement <2 x float> %541, i32 0 %543 = extractelement <2 x float> %541, i32 1 %544 = call nsz float @llvm.amdgcn.interp.p1(float %542, i32 2, i32 0, i32 %6) #1 %545 = call nsz float @llvm.amdgcn.interp.p2(float %544, float %543, i32 2, i32 0, i32 %6) #1 %546 = fmul nsz float %545, %525 %547 = fadd nsz float %546, %496 %548 = bitcast <2 x i32> %8 to <2 x float> %549 = extractelement <2 x float> %548, i32 0 %550 = extractelement <2 x float> %548, i32 1 %551 = call nsz float @llvm.amdgcn.interp.p1(float %549, i32 3, i32 0, i32 %6) #1 %552 = call nsz float @llvm.amdgcn.interp.p2(float %551, float %550, i32 3, i32 0, i32 %6) #1 %553 = fmul nsz float %552, %526 %554 = fadd nsz float %553, %503 %555 = fmul nsz float %147, 9.000000e+00 %556 = fsub nsz float %97, %555 %557 = call nsz float @llvm.cos.f32(float %143) #1 %558 = fmul nsz float %556, %557 %559 = call nsz float @llvm.sin.f32(float %143) #1 %560 = fmul nsz float %556, %559 %561 = fadd nsz float %558, 5.000000e-01 %562 = fadd nsz float %560, 5.000000e-01 %563 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %564 = load <8 x i32>, <8 x i32> addrspace(2)* %563, align 32, !invariant.load !0 %565 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %566 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %565, i64 0, i64 3, !amdgpu.uniform !0 %567 = load <4 x i32>, <4 x i32> addrspace(2)* %566, align 16, !invariant.load !0 %568 = bitcast float %561 to i32 %569 = bitcast float %562 to i32 %570 = insertelement <2 x i32> undef, i32 %568, i32 0 %571 = insertelement <2 x i32> %570, i32 %569, i32 1 %572 = bitcast <2 x i32> %571 to <2 x float> %573 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %572, <8 x i32> %564, <4 x i32> %567, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %574 = extractelement <4 x float> %573, i32 0 %575 = extractelement <4 x float> %573, i32 1 %576 = extractelement <4 x float> %573, i32 2 %577 = extractelement <4 x float> %573, i32 3 %578 = bitcast <2 x i32> %8 to <2 x float> %579 = extractelement <2 x float> %578, i32 0 %580 = extractelement <2 x float> %578, i32 1 %581 = call nsz float @llvm.amdgcn.interp.p1(float %579, i32 0, i32 0, i32 %6) #1 %582 = call nsz float @llvm.amdgcn.interp.p2(float %581, float %580, i32 0, i32 0, i32 %6) #1 %583 = fmul nsz float %582, %574 %584 = fadd nsz float %583, %533 %585 = bitcast <2 x i32> %8 to <2 x float> %586 = extractelement <2 x float> %585, i32 0 %587 = extractelement <2 x float> %585, i32 1 %588 = call nsz float @llvm.amdgcn.interp.p1(float %586, i32 1, i32 0, i32 %6) #1 %589 = call nsz float @llvm.amdgcn.interp.p2(float %588, float %587, i32 1, i32 0, i32 %6) #1 %590 = fmul nsz float %589, %575 %591 = fadd nsz float %590, %540 %592 = bitcast <2 x i32> %8 to <2 x float> %593 = extractelement <2 x float> %592, i32 0 %594 = extractelement <2 x float> %592, i32 1 %595 = call nsz float @llvm.amdgcn.interp.p1(float %593, i32 2, i32 0, i32 %6) #1 %596 = call nsz float @llvm.amdgcn.interp.p2(float %595, float %594, i32 2, i32 0, i32 %6) #1 %597 = fmul nsz float %596, %576 %598 = fadd nsz float %597, %547 %599 = bitcast <2 x i32> %8 to <2 x float> %600 = extractelement <2 x float> %599, i32 0 %601 = extractelement <2 x float> %599, i32 1 %602 = call nsz float @llvm.amdgcn.interp.p1(float %600, i32 3, i32 0, i32 %6) #1 %603 = call nsz float @llvm.amdgcn.interp.p2(float %602, float %601, i32 3, i32 0, i32 %6) #1 %604 = fmul nsz float %603, %577 %605 = fadd nsz float %604, %554 %606 = fmul nsz float %584, 0x3FB99999A0000000 %607 = fmul nsz float %591, 0x3FB99999A0000000 %608 = fmul nsz float %598, 0x3FB99999A0000000 %609 = fmul nsz float %605, 0x3FB99999A0000000 br label %endif142 endif142: ; preds = %if18, %main_body %.04 = phi float [ %609, %if18 ], [ %73, %main_body ] %.03 = phi float [ %608, %if18 ], [ %67, %main_body ] %.02 = phi float [ %607, %if18 ], [ %61, %main_body ] %.0 = phi float [ %606, %if18 ], [ %55, %main_body ] %610 = fmul nsz float %.0, 0x3FD51EB860000000 %611 = fmul nsz float %.02, 0x3FD51EB860000000 %612 = fadd nsz float %611, %610 %613 = fmul nsz float %.03, 0x3FD51EB860000000 %614 = fadd nsz float %612, %613 %615 = fsub nsz float 1.000000e+00, %81 %616 = fmul nsz float %.0, %81 %617 = fmul nsz float %614, %615 %618 = fadd nsz float %616, %617 %619 = fsub nsz float 1.000000e+00, %81 %620 = fmul nsz float %.02, %81 %621 = fmul nsz float %614, %619 %622 = fadd nsz float %620, %621 %623 = fsub nsz float 1.000000e+00, %81 %624 = fmul nsz float %.03, %81 %625 = fmul nsz float %614, %623 %626 = fadd nsz float %624, %625 %627 = fsub nsz float 1.000000e+00, %81 %628 = fmul nsz float %627, 5.000000e-01 %629 = fadd nsz float %628, 1.000000e+00 %630 = fmul nsz float %618, %629 %631 = fmul nsz float %622, %629 %632 = fmul nsz float %626, %629 %633 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %634 = load <16 x i8>, <16 x i8> addrspace(2)* %633, align 16, !invariant.load !0 %635 = call nsz float @llvm.SI.load.const(<16 x i8> %634, i32 32) %636 = fcmp nsz olt float %635, %97 br i1 %636, label %if149, label %endif162 if149: ; preds = %endif142 %637 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %638 = load <16 x i8>, <16 x i8> addrspace(2)* %637, align 16, !invariant.load !0 %639 = call nsz float @llvm.SI.load.const(<16 x i8> %638, i32 64) %640 = fcmp nsz olt float %639, 0x3FE6666660000000 %641 = fsub nsz float 1.000000e+00, %81 br i1 %640, label %if151, label %else156 if151: ; preds = %if149 %642 = fmul nsz float %641, %105 %643 = fmul nsz float %642, 5.000000e-01 %644 = fadd nsz float %643, 1.000000e+00 %645 = fmul nsz float %630, %644 br label %endif162 else156: ; preds = %if149 %646 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %647 = load <16 x i8>, <16 x i8> addrspace(2)* %646, align 16, !invariant.load !0 %648 = call nsz float @llvm.SI.load.const(<16 x i8> %647, i32 64) %649 = fmul nsz float %648, %641 %650 = fmul nsz float %649, %105 %651 = fmul nsz float %650, 5.000000e-01 %652 = fadd nsz float %651, %630 br label %endif162 endif162: ; preds = %if151, %else156, %endif142 %.2 = phi float [ %630, %endif142 ], [ %645, %if151 ], [ %652, %else156 ] %653 = bitcast float %5 to i32 %654 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %653, 10 %655 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %654, float %.2, 11 %656 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %655, float %631, 12 %657 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %656, float %632, 13 %658 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %657, float %.04, 14 %659 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %658, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %659 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 declare float @sqrtf(float) declare float @fabsf(float) attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readonly } attributes #3 = { nounwind } !0 = !{} !1 = !{float 2.500000e+00} Total memory used = 48789883(0x02e8797b) bytes Texture #3 2048,1536 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0] 0: TEX TEMP[0], IN[0], SAMP[0], 2D 1: MOV OUT[0], TEMP[0] 2: END radeonsi: Compiling shader 33 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !invariant.load !0 %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3, !amdgpu.uniform !0 %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !invariant.load !0 %28 = bitcast <2 x i32> %12 to <2 x float> %29 = extractelement <2 x float> %28, i32 0 %30 = extractelement <2 x float> %28, i32 1 %31 = call nsz float @llvm.amdgcn.interp.p1(float %29, i32 0, i32 0, i32 %6) #1 %32 = call nsz float @llvm.amdgcn.interp.p2(float %31, float %30, i32 0, i32 0, i32 %6) #1 %33 = bitcast <2 x i32> %12 to <2 x float> %34 = extractelement <2 x float> %33, i32 0 %35 = extractelement <2 x float> %33, i32 1 %36 = call nsz float @llvm.amdgcn.interp.p1(float %34, i32 1, i32 0, i32 %6) #1 %37 = call nsz float @llvm.amdgcn.interp.p2(float %36, float %35, i32 1, i32 0, i32 %6) #1 %38 = bitcast float %32 to i32 %39 = bitcast float %37 to i32 %40 = insertelement <2 x i32> undef, i32 %38, i32 0 %41 = insertelement <2 x i32> %40, i32 %39, i32 1 %42 = bitcast <2 x i32> %41 to <2 x float> %43 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %42, <8 x i32> %24, <4 x i32> %27, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = extractelement <4 x float> %43, i32 2 %47 = extractelement <4 x float> %43, i32 3 %48 = bitcast float %5 to i32 %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %48, 10 %50 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49, float %44, 11 %51 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %50, float %45, 12 %52 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %51, float %46, 13 %53 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %52, float %47, 14 %54 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %53, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %54 } ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readonly } !0 = !{} SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x4 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b64 s[6:7], exec ; BE86017E s_wqm_b64 exec, exec ; BEFE077E s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C00E0302 00000000 s_load_dwordx4 s[0:3], s[4:5], 0x30 ; C00A0002 00000030 s_mov_b32 m0, s11 ; BEFC000B v_interp_p1_f32 v0, v8, attr0.x ; D4000008 v_interp_p1_f32 v1, v8, attr0.y ; D4040108 v_interp_p2_f32 v0, v9, attr0.x ; D4010009 v_interp_p2_f32 v1, v9, attr0.y ; D4050109 s_and_b64 exec, exec, s[6:7] ; 86FE067E s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030000 s_waitcnt vmcnt(0) ; BF8C0F70 Shader epilog disassembly: v_cvt_pkrtz_f16_f32_e64 v0, v0, v1 ; D2960000 00020300 v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; D2960001 00020702 exp mrt0 v0, v1, v0, v0 done compr vm ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0020 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 92 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** radeonsi: Compiling shader 34 Vertex Shader Prolog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> @vs_prolog(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> undef, i32 %0, 0 %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %20, i32 %1, 1 %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %21, i32 %2, 2 %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %22, i32 %3, 3 %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %23, i32 %4, 4 %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %24, i32 %5, 5 %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %25, i32 %6, 6 %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %26, i32 %7, 7 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %27, i32 %8, 8 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %28, i32 %9, 9 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %29, i32 %10, 10 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %30, i32 %11, 11 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %31, i32 %12, 12 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %32, i32 %13, 13 %34 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %33, i32 %14, 14 %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %34, i32 %15, 15 %36 = bitcast i32 %16 to float %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %35, float %36, 16 %38 = bitcast i32 %17 to float %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %37, float %38, 17 %40 = bitcast i32 %18 to float %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %39, float %40, 18 %42 = bitcast i32 %19 to float %43 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %41, float %42, 19 %44 = add i32 %16, %12 %45 = bitcast i32 %44 to float %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %43, float %45, 20 %47 = add i32 %16, %12 %48 = bitcast i32 %47 to float %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %46, float %48, 21 %50 = add i32 %16, %12 %51 = bitcast i32 %50 to float %52 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %49, float %51, 22 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %52 } attributes #0 = { "no-signed-zeros-fp-math"="true" } SHADER KEY part.vs.prolog.instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} part.vs.epilog.export_prim_id = 0 as_es = 0 as_ls = 0 mono.vs.fix_fetch = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} opt.hw_vs.kill_outputs = 0x0 opt.hw_vs.kill_outputs2 = 0x0 opt.hw_vs.clip_disable = 0 Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s12, v0 ; 3208000C v_mov_b32_e32 v5, v4 ; 7E0A0304 v_mov_b32_e32 v6, v4 ; 7E0C0304 Shader main disassembly: s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C00A0105 00000000 s_load_dwordx4 s[12:15], s[10:11], 0x10 ; C00A0305 00000010 s_load_dwordx4 s[8:11], s[10:11], 0x20 ; C00A0205 00000020 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C00A0001 00000000 v_mov_b32_e32 v0, 0 ; 7E000280 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[7:10], v4, s[4:7], 0 idxen ; E00C2000 80010704 s_waitcnt vmcnt(0) ; BF8C0F70 buffer_load_format_xyzw v[10:13], v5, s[12:15], 0 idxen ; E00C2000 80030A05 s_nop 0 ; BF800000 buffer_load_format_xyzw v[3:6], v6, s[8:11], 0 idxen ; E00C2000 80020306 s_buffer_load_dword s4, s[0:3], 0x100 ; C0220100 00000100 s_buffer_load_dword s5, s[0:3], 0x104 ; C0220140 00000104 s_buffer_load_dword s6, s[0:3], 0x108 ; C0220180 00000108 s_buffer_load_dword s7, s[0:3], 0x10c ; C02201C0 0000010C s_buffer_load_dword s8, s[0:3], 0x110 ; C0220200 00000110 s_buffer_load_dword s9, s[0:3], 0x114 ; C0220240 00000114 s_buffer_load_dword s10, s[0:3], 0x118 ; C0220280 00000118 s_buffer_load_dword s11, s[0:3], 0x11c ; C02202C0 0000011C s_buffer_load_dword s12, s[0:3], 0x120 ; C0220300 00000120 s_buffer_load_dword s13, s[0:3], 0x124 ; C0220340 00000124 s_buffer_load_dword s14, s[0:3], 0x128 ; C0220380 00000128 s_buffer_load_dword s15, s[0:3], 0x12c ; C02203C0 0000012C s_buffer_load_dword s16, s[0:3], 0x130 ; C0220400 00000130 s_buffer_load_dword s17, s[0:3], 0x134 ; C0220440 00000134 s_buffer_load_dword s18, s[0:3], 0x138 ; C0220480 00000138 s_buffer_load_dword s0, s[0:3], 0x13c ; C0220000 0000013C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v7 ; 0A020E04 s_waitcnt vmcnt(1) ; BF8C0F71 v_mul_f32_e32 v12, s5, v7 ; 0A180E05 v_mul_f32_e32 v13, s6, v7 ; 0A1A0E06 v_mul_f32_e32 v7, s7, v7 ; 0A0E0E07 v_mac_f32_e32 v1, s8, v8 ; 2C021008 v_mac_f32_e32 v12, s9, v8 ; 2C181009 v_mac_f32_e32 v13, s10, v8 ; 2C1A100A v_mac_f32_e32 v7, s11, v8 ; 2C0E100B v_mac_f32_e32 v1, s12, v9 ; 2C02120C v_mac_f32_e32 v12, s13, v9 ; 2C18120D v_mac_f32_e32 v13, s14, v9 ; 2C1A120E v_mac_f32_e32 v7, s15, v9 ; 2C0E120F s_waitcnt vmcnt(0) ; BF8C0F70 exp param0 v3, v4, v5, v6 ; C400020F 06050403 v_add_f32_e32 v1, s16, v1 ; 02020210 s_waitcnt expcnt(0) ; BF8C0F0F v_add_f32_e32 v3, s17, v12 ; 02061811 v_add_f32_e32 v4, s18, v13 ; 02081A12 v_add_f32_e32 v5, s0, v7 ; 020A0E00 exp param1 v10, v11, v0, v0 ; C400021F 00000B0A exp pos0 v1, v3, v4, v5 ; C40000CF 05040301 s_waitcnt expcnt(0) ; BF8C0F0F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp pos1 v1, off, off, off done ; C40008D1 00000001 s_waitcnt expcnt(0) ; BF8C0F0F Shader epilog disassembly: s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 340 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x4 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b64 s[6:7], exec ; BE86017E s_wqm_b64 exec, exec ; BEFE077E s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C00E0302 00000000 s_load_dwordx4 s[0:3], s[4:5], 0x30 ; C00A0002 00000030 s_mov_b32 m0, s11 ; BEFC000B v_interp_p1_f32 v0, v2, attr1.x ; D4000402 v_interp_p1_f32 v1, v2, attr1.y ; D4040502 v_interp_p2_f32 v0, v3, attr1.x ; D4010403 v_interp_p2_f32 v1, v3, attr1.y ; D4050503 s_and_b64 exec, exec, s[6:7] ; 86FE067E v_interp_p1_f32 v4, v2, attr0.z ; D4100202 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[5:8], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030500 v_interp_p1_f32 v0, v2, attr0.x ; D4000002 v_interp_p1_f32 v1, v2, attr0.y ; D4040102 v_interp_p2_f32 v4, v3, attr0.z ; D4110203 v_interp_p1_f32 v2, v2, attr0.w ; D4080302 v_interp_p2_f32 v0, v3, attr0.x ; D4010003 v_interp_p2_f32 v1, v3, attr0.y ; D4050103 v_interp_p2_f32 v2, v3, attr0.w ; D4090303 s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v4, v7, v4 ; 0A080907 v_mul_f32_e32 v3, v8, v2 ; 0A060508 v_mul_f32_e32 v0, v5, v0 ; 0A000105 v_mul_f32_e32 v1, v6, v1 ; 0A020306 v_mov_b32_e32 v2, v4 ; 7E040304 Shader epilog disassembly: v_cvt_pkrtz_f16_f32_e64 v0, v0, v1 ; D2960000 00020300 v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; D2960001 00020702 exp mrt0 v0, v1, v0, v0 done compr vm ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0002 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 144 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** ===== Scanning for languages ======= Found campaign: 1 line 31 Done finding languages Set Fullscreen 1 PLATFORM IDX: 3 Init line 133 Texture #3 1024,2048 Audio setting channel count to 12 Audio setting channel count to 20 VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ./run.sh: line 3: 11124 Segmentation fault (core dumped) LD_LIBRARY_PATH=./lib:$LD_LIBRARY_PATH ./runner