GLSL excerpt: [vo/opengl] [ 1] color = fract(gl_FragCoord.y / 2) < 0.5 [vo/opengl] [ 2] ? texture(texture0, texcoord0) [vo/opengl] [ 3] : texture(texture1, texcoord0); non-working: FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL SV[0], POSITION DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[2] DCL TEMP[0] DCL TEMP[1..2], LOCAL IMM[0] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0], SV[0] 1: MAD TEMP[0].y, SV[0], CONST[2].xxxx, CONST[2].yyyy 2: MUL TEMP[1].x, TEMP[0].yyyy, IMM[0].xxxx 3: FRC TEMP[1].x, TEMP[1].xxxx 4: FSLT TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 5: UIF TEMP[1].xxxx :2 6: MOV TEMP[1].xy, IN[0].xyyy 7: TEX TEMP[1], TEMP[1], SAMP[0], 2D 8: MOV TEMP[1], TEMP[1] 9: ELSE :2 10: MOV TEMP[2].xy, IN[0].xyyy 11: TEX TEMP[2], TEMP[2], SAMP[1], 2D 12: MOV TEMP[1], TEMP[2] 13: ENDIF 14: MOV OUT[0], TEMP[1] 15: END radeonsi: Compiling shader 17 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !invariant.load !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) %27 = fmul float %25, %16 %28 = fadd float %27, %26 %29 = fmul float %28, 5.000000e-01 %30 = call float @llvm.floor.f32(float %29) %31 = fsub float %29, %30 %32 = fcmp olt float %31, 5.000000e-01 %33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %35 = select i1 %32, i64 0, i64 2 %36 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 %35 %37 = load <8 x i32>, <8 x i32> addrspace(2)* %36, align 32, !invariant.load !0 %38 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %39 = select i1 %32, i64 3, i64 7 %40 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %38, i64 0, i64 %39 %41 = load <4 x i32>, <4 x i32> addrspace(2)* %40, align 16, !invariant.load !0 %42 = bitcast float %33 to i32 %43 = bitcast float %34 to i32 %44 = insertelement <2 x i32> undef, i32 %42, i32 0 %45 = insertelement <2 x i32> %44, i32 %43, i32 1 %46 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %45, <8 x i32> %37, <4 x i32> %41, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = extractelement <4 x float> %46, i32 3 %51 = bitcast float %5 to i32 %52 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %51, 10 %53 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %52, float %47, 11 %54 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %53, float %48, 12 %55 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %54, float %49, 13 %56 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %55, float %50, 14 %57 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %56, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %57 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" } attributes #1 = { nounwind readnone } !0 = !{} SHADER KEY prolog.color_two_side = 0 prolog.flatshade_colors = 0 prolog.poly_stipple = 0 prolog.force_persp_sample_interp = 0 prolog.force_linear_sample_interp = 0 prolog.force_persp_center_interp = 0 prolog.force_linear_center_interp = 0 prolog.bc_optimize_for_persp = 0 prolog.bc_optimize_for_linear = 0 epilog.spi_shader_col_format = 0x4 epilog.color_is_int8 = 0x0 epilog.last_cbuf = 0 epilog.alpha_func = 7 epilog.alpha_to_one = 0 epilog.poly_line_smoothing = 0 epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b64 s[8:9], exec ; BE88017E s_wqm_b64 exec, exec ; BEFE077E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C00A0001 00000000 v_mov_b32_e32 v1, 0 ; 7E020280 v_mov_b32_e32 v6, s5 ; 7E0C0205 s_mov_b32 m0, s11 ; BEFC000B v_interp_p1_f32 v8, v2, 0, 0, [m0] ; D4200002 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s6, s[0:3], 0x20 ; C0220180 00000020 s_buffer_load_dword s0, s[0:3], 0x24 ; C0220000 00000024 v_interp_p2_f32 v8, [v8], v3, 0, 0, [m0] ; D4210003 v_interp_p1_f32 v9, v2, 1, 0, [m0] ; D4240102 v_interp_p2_f32 v9, [v9], v3, 1, 0, [m0] ; D4250103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s0 ; 7E000200 v_mac_f32_e32 v0, s6, v12 ; 2C001806 v_mul_f32_e32 v4, 0.5, v0 ; 0A0800F0 v_floor_f32_e32 v4, v4 ; 7E083F04 v_mad_f32 v0, v0, 0.5, -v4 ; D1C10000 8411E100 v_cmp_gt_f32_e64 s[0:1], 0.5, v0 ; D0440000 000200F0 v_cndmask_b32_e64 v0, 2, 0, s[0:1] ; D1000000 00010082 v_lshlrev_b64 v[4:5], 5, v[0:1] ; D28F0004 00020085 v_add_i32_e32 v10, vcc, s4, v4 ; 32140804 v_addc_u32_e32 v11, vcc, v5, v6, vcc ; 38160D05 v_add_i32_e32 v4, vcc, 16, v10 ; 32081490 v_cndmask_b32_e64 v0, 7, 3, s[0:1] ; D1000000 00010687 v_addc_u32_e32 v5, vcc, 0, v11, vcc ; 380A1680 v_lshlrev_b64 v[0:1], 4, v[0:1] ; D28F0000 00020084 v_add_i32_e32 v12, vcc, s4, v0 ; 32180004 v_addc_u32_e32 v13, vcc, v6, v1, vcc ; 381A0306 flat_load_dwordx4 v[4:7], v[4:5] ; DC5C0000 04000004 s_nop 0 ; BF800000 flat_load_dwordx4 v[15:18], v[10:11] ; DC5C0000 0F00000A s_nop 0 ; BF800000 flat_load_dwordx4 v[0:3], v[12:13] ; DC5C0000 0000000C s_waitcnt vmcnt(2) lgkmcnt(0) ; BF8C0072 v_readfirstlane_b32 s4, v4 ; 7E080504 s_waitcnt vmcnt(1) ; BF8C0F71 v_readfirstlane_b32 s0, v15 ; 7E00050F v_readfirstlane_b32 s1, v16 ; 7E020510 v_readfirstlane_b32 s2, v17 ; 7E040511 v_readfirstlane_b32 s3, v18 ; 7E060512 v_readfirstlane_b32 s5, v5 ; 7E0A0505 v_readfirstlane_b32 s6, v6 ; 7E0C0506 v_readfirstlane_b32 s7, v7 ; 7E0E0507 s_waitcnt vmcnt(0) ; BF8C0F70 v_readfirstlane_b32 s12, v0 ; 7E180500 v_readfirstlane_b32 s13, v1 ; 7E1A0501 v_readfirstlane_b32 s14, v2 ; 7E1C0502 v_readfirstlane_b32 s15, v3 ; 7E1E0503 s_and_b64 exec, exec, s[8:9] ; 86FE087E s_nop 3 ; BF800003 image_sample v[0:3], v[8:9], s[0:7], s[12:15] dmask:0xf ; F0800F00 00600008 v_mov_b32_e32 v13, v14 ; 7E1A030E s_waitcnt vmcnt(0) ; BF8C0F70 Shader epilog disassembly: v_cvt_pkrtz_f16_f32_e64 v0, v0, v1 ; D2960000 00020300 v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; D2960001 00020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd277 SPI_PS_INPUT_ENA = 0x0202 *** SHADER STATS *** SGPRS: 24 VGPRS: 20 Spilled SGPRs: 0 Spilled VGPRs: 0 Code Size: 300 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** working with commit 16be87c904293c2e53d50cc3519789a604a6a33b reverted: FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL SV[0], POSITION DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[2] DCL TEMP[0] DCL TEMP[1..2], LOCAL IMM[0] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0], SV[0] 1: MAD TEMP[0].y, SV[0], CONST[2].xxxx, CONST[2].yyyy 2: MUL TEMP[1].x, TEMP[0].yyyy, IMM[0].xxxx 3: FRC TEMP[1].x, TEMP[1].xxxx 4: FSLT TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 5: UIF TEMP[1].xxxx :2 6: MOV TEMP[1].xy, IN[0].xyyy 7: TEX TEMP[1], TEMP[1], SAMP[0], 2D 8: MOV TEMP[1], TEMP[1] 9: ELSE :2 10: MOV TEMP[2].xy, IN[0].xyyy 11: TEX TEMP[2], TEMP[2], SAMP[1], 2D 12: MOV TEMP[1], TEMP[2] 13: ENDIF 14: MOV OUT[0], TEMP[1] 15: END radeonsi: Compiling shader 17 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !invariant.load !0 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36) %27 = fmul float %25, %16 %28 = fadd float %27, %26 %29 = fmul float %28, 5.000000e-01 %30 = call float @llvm.floor.f32(float %29) %31 = fsub float %29, %30 %32 = fcmp olt float %31, 5.000000e-01 %33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %35 = bitcast float %33 to i32 %36 = bitcast float %34 to i32 %37 = insertelement <2 x i32> undef, i32 %35, i32 0 %38 = insertelement <2 x i32> %37, i32 %36, i32 1 br i1 %32, label %IF, label %ELSE IF: ; preds = %main_body %39 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %40 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %39, i64 0, i64 3, !amdgpu.uniform !0 %41 = load <4 x i32>, <4 x i32> addrspace(2)* %40, align 16, !invariant.load !0 %42 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %43 = load <8 x i32>, <8 x i32> addrspace(2)* %42, align 32, !invariant.load !0 %44 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %38, <8 x i32> %43, <4 x i32> %41, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) br label %ENDIF ELSE: ; preds = %main_body %45 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %46 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %45, i64 0, i64 7, !amdgpu.uniform !0 %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !invariant.load !0 %48 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2, !amdgpu.uniform !0 %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !invariant.load !0 %50 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %38, <8 x i32> %49, <4 x i32> %47, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) br label %ENDIF ENDIF: ; preds = %ELSE, %IF %.sink6 = phi <4 x float> [ %50, %ELSE ], [ %44, %IF ] %51 = extractelement <4 x float> %.sink6, i32 0 %52 = extractelement <4 x float> %.sink6, i32 1 %53 = extractelement <4 x float> %.sink6, i32 2 %54 = extractelement <4 x float> %.sink6, i32 3 %55 = bitcast float %5 to i32 %56 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %55, 10 %57 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %56, float %51, 11 %58 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %57, float %52, 12 %59 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %58, float %53, 13 %60 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %59, float %54, 14 %61 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %60, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %61 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" } attributes #1 = { nounwind readnone } !0 = !{} SHADER KEY prolog.color_two_side = 0 prolog.flatshade_colors = 0 prolog.poly_stipple = 0 prolog.force_persp_sample_interp = 0 prolog.force_linear_sample_interp = 0 prolog.force_persp_center_interp = 0 prolog.force_linear_center_interp = 0 prolog.bc_optimize_for_persp = 0 prolog.bc_optimize_for_linear = 0 epilog.spi_shader_col_format = 0x4 epilog.color_is_int8 = 0x0 epilog.last_cbuf = 0 epilog.alpha_func = 7 epilog.alpha_to_one = 0 epilog.poly_line_smoothing = 0 epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b64 s[0:1], exec ; BE80017E s_wqm_b64 exec, exec ; BEFE077E s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C00A0301 00000000 s_mov_b32 m0, s11 ; BEFC000B v_interp_p1_f32 v4, v2, 0, 0, [m0] ; D4100002 v_interp_p2_f32 v4, [v4], v3, 0, 0, [m0] ; D4110003 v_interp_p1_f32 v5, v2, 1, 0, [m0] ; D4140102 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s3, s[12:15], 0x24 ; C02200C6 00000024 s_buffer_load_dword s2, s[12:15], 0x20 ; C0220086 00000020 v_interp_p2_f32 v5, [v5], v3, 1, 0, [m0] ; D4150103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s3 ; 7E000203 v_mac_f32_e32 v0, s2, v12 ; 2C001802 v_mul_f32_e32 v1, 0.5, v0 ; 0A0200F0 v_floor_f32_e32 v1, v1 ; 7E023F01 v_mad_f32 v0, v0, 0.5, -v1 ; D1C10000 8405E100 v_cmp_ngt_f32_e32 vcc, 0.5, v0 ; 7C9600F0 s_and_saveexec_b64 s[2:3], vcc ; BE82206A s_xor_b64 s[2:3], exec, s[2:3] ; 8882027E s_load_dwordx8 s[12:19], s[4:5], 0x40 ; C00E0302 00000040 s_load_dwordx4 s[20:23], s[4:5], 0x70 ; C00A0502 00000070 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], v[4:5], s[12:19], s[20:23] dmask:0xf ; F0800F00 00A30004 s_waitcnt vmcnt(0) ; BF8C0F70 s_or_saveexec_b64 s[2:3], s[2:3] ; BE822102 s_xor_b64 exec, exec, s[2:3] ; 88FE027E s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C00E0302 00000000 s_load_dwordx4 s[4:7], s[4:5], 0x30 ; C00A0102 00000030 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], v[4:5], s[12:19], s[4:7] dmask:0xf ; F0800F00 00230004 s_waitcnt vmcnt(0) ; BF8C0F70 s_or_b64 exec, exec, s[2:3] ; 87FE027E s_and_b64 exec, exec, s[0:1] ; 86FE007E v_mov_b32_e32 v13, v14 ; 7E1A030E Shader epilog disassembly: v_cvt_pkrtz_f16_f32_e64 v0, v0, v1 ; D2960000 00020300 v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; D2960001 00020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd277 SPI_PS_INPUT_ENA = 0x0202 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Code Size: 208 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ********************