FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL IN[2], TEXCOORD[4], PERSPECTIVE DCL IN[3], TEXCOORD[5], PERSPECTIVE DCL IN[4], COLOR[1], COLOR, CENTROID DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL CONST[0][0..2] DCL TEMP[0], LOCAL DCL TEMP[1..3] IMM[0] FLT32 { 0.5000, 0.0156, 0.0000, -0.5000} IMM[1] FLT32 { 1.0000, 0.0000, 340282346638528859811704183484516925440.0000, -340282346638528859811704183484516925440.0000} 0: SLT TEMP[0].x, IMM[0].xxxx, IN[3].wwww 1: IF TEMP[0].xxxx :7 2: TEX TEMP[1], IN[3], SAMP[2], 2D 3: TEX TEMP[2], IN[3].zyzw, SAMP[2], 2D 4: MAX TEMP[3].x, TEMP[1].xxxx, TEMP[2].xxxx 5: ADD TEMP[1], -TEMP[3].xxxx, IMM[0].xxxx 6: KILL_IF TEMP[1] 7: ENDIF 8: MUL TEMP[1].xy, CONST[0][1], IN[2] 9: MUL TEMP[1].xy, TEMP[1], IMM[0].xxxx 10: RCP TEMP[0], IN[2].wwww 11: MIN TEMP[0], IMM[1].zzzz, TEMP[0] 12: MAX TEMP[1].z, IMM[1].wwww, TEMP[0] 13: MAD TEMP[1].xy, TEMP[1], TEMP[1].zzzz, IMM[0].xxxx 14: MUL TEMP[1].xy, TEMP[1], IMM[0].yyyy 15: MOV TEMP[1].zw, IMM[0].zzzz 16: TXL TEMP[1], TEMP[1], SAMP[1], 2D 17: ADD TEMP[1].yzw, -CONST[0][0].xxyz, IN[1].xxyz 18: DP3 TEMP[1].y, TEMP[1].yzww, TEMP[1].yzww 19: RSQ TEMP[0], |TEMP[1].yyyy| 20: MIN TEMP[1].y, IMM[1].zzzz, TEMP[0] 21: RCP TEMP[0], TEMP[1].yyyy 22: MIN TEMP[0], IMM[1].zzzz, TEMP[0] 23: MAX TEMP[1].y, IMM[1].wwww, TEMP[0] 24: ADD TEMP[1].y, TEMP[1].yyyy, -CONST[0][2].xxxx 25: MUL TEMP[1].z, TEMP[1].yyyy, CONST[0][2].yyyy 26: MAD TEMP[1].x, TEMP[1].yyyy, CONST[0][2].yyyy, TEMP[1].xxxx 27: MAD TEMP[1].y, TEMP[1].zzzz, IMM[0].xxxx, IMM[0].xxxx 28: ADD TEMP[1].x, -TEMP[1].yyyy, TEMP[1].xxxx 29: MOV TEMP[1], -TEMP[1].xxxx 30: KILL_IF TEMP[1] 31: MOV TEMP[1], IN[4].wwww 32: KILL_IF TEMP[1] 33: TEX TEMP[1], IN[0], SAMP[0], 2D 34: ADD TEMP[1], TEMP[1].wwww, IMM[0].wwww 35: KILL_IF TEMP[1] 36: MOV OUT[0], IMM[1].xxxx 37: END radeonsi: Compiling shader 43 TGSI shader LLVM IR: ; ModuleID = 'mesa-shader' source_filename = "mesa-shader" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.maxnum.f32(float, float) #0 ; Function Attrs: nounwind declare void @llvm.amdgcn.kill(i1) #2 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #3 ; Function Attrs: nounwind readnone speculatable declare float @llvm.minnum.f32(float, float) #0 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.fabs.f32(float) #0 ; Function Attrs: nounwind readnone speculatable declare float @llvm.sqrt.f32(float) #0 ; Function Attrs: nounwind readnone speculatable declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0 ; Function Attrs: nounwind declare void @llvm.amdgcn.exp.compr.v2i16(i32, i32, <2 x i16>, <2 x i16>, i1, i1) #2 define amdgpu_ps void @wrapper([0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x float] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #4 { main_body: %22 = ptrtoint [0 x float] addrspace(2)* %2 to i64 %23 = bitcast i64 %22 to <2 x i32> %24 = extractelement <2 x i32> %23, i32 0 %25 = extractelement <2 x i32> %23, i32 1 %26 = bitcast <2 x i32> %7 to <2 x float> %27 = extractelement <2 x float> %26, i32 0 %28 = extractelement <2 x float> %26, i32 1 %29 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 3, i32 4, i32 %5) #3 %30 = call nsz float @llvm.amdgcn.interp.p2(float %29, float %28, i32 3, i32 4, i32 %5) #3 %31 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 0, i32 0, i32 %5) #3 %32 = call nsz float @llvm.amdgcn.interp.p2(float %31, float %28, i32 0, i32 0, i32 %5) #3 %33 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 1, i32 0, i32 %5) #3 %34 = call nsz float @llvm.amdgcn.interp.p2(float %33, float %28, i32 1, i32 0, i32 %5) #3 %35 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 0, i32 1, i32 %5) #3 %36 = call nsz float @llvm.amdgcn.interp.p2(float %35, float %28, i32 0, i32 1, i32 %5) #3 %37 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 1, i32 1, i32 %5) #3 %38 = call nsz float @llvm.amdgcn.interp.p2(float %37, float %28, i32 1, i32 1, i32 %5) #3 %39 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 2, i32 1, i32 %5) #3 %40 = call nsz float @llvm.amdgcn.interp.p2(float %39, float %28, i32 2, i32 1, i32 %5) #3 %41 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 0, i32 2, i32 %5) #3 %42 = call nsz float @llvm.amdgcn.interp.p2(float %41, float %28, i32 0, i32 2, i32 %5) #3 %43 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 1, i32 2, i32 %5) #3 %44 = call nsz float @llvm.amdgcn.interp.p2(float %43, float %28, i32 1, i32 2, i32 %5) #3 %45 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 3, i32 2, i32 %5) #3 %46 = call nsz float @llvm.amdgcn.interp.p2(float %45, float %28, i32 3, i32 2, i32 %5) #3 %47 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 3, i32 3, i32 %5) #3 %48 = call nsz float @llvm.amdgcn.interp.p2(float %47, float %28, i32 3, i32 3, i32 %5) #3 %49 = fcmp nsz ogt float %48, 5.000000e-01 br i1 %49, label %if1.i, label %main.exit if1.i: ; preds = %main_body %50 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 2, i32 3, i32 %5) #3 %51 = call nsz float @llvm.amdgcn.interp.p2(float %50, float %28, i32 2, i32 3, i32 %5) #3 %52 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 1, i32 3, i32 %5) #3 %53 = call nsz float @llvm.amdgcn.interp.p2(float %52, float %28, i32 1, i32 3, i32 %5) #3 %54 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 0, i32 3, i32 %5) #3 %55 = call nsz float @llvm.amdgcn.interp.p2(float %54, float %28, i32 0, i32 3, i32 %5) #3 %56 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(2)* %3, i64 0, i64 20, !amdgpu.uniform !0 %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !invariant.load !0, !alias.scope !1, !noalias !4 %58 = bitcast [0 x <8 x i32>] addrspace(2)* %3 to [0 x <4 x i32>] addrspace(2)* %59 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %58, i64 0, i64 43, !amdgpu.uniform !0 %60 = load <4 x i32>, <4 x i32> addrspace(2)* %59, align 16, !invariant.load !0, !alias.scope !1, !noalias !4 %61 = bitcast float %55 to i32 %62 = bitcast float %53 to i32 %63 = insertelement <2 x i32> undef, i32 %61, i32 0 %64 = insertelement <2 x i32> %63, i32 %62, i32 1 %65 = bitcast <2 x i32> %64 to <2 x float> %66 = call float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float> %65, <8 x i32> %57, <4 x i32> %60, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %67 = bitcast float %51 to i32 %68 = insertelement <2 x i32> undef, i32 %67, i32 0 %69 = insertelement <2 x i32> %68, i32 %62, i32 1 %70 = bitcast <2 x i32> %69 to <2 x float> %71 = call float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float> %70, <8 x i32> %57, <4 x i32> %60, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %72 = call nsz float @llvm.maxnum.f32(float %66, float %71) #3 %73 = fsub nsz float 5.000000e-01, %72 %74 = fcmp nsz ogt float %73, 0.000000e+00 call void @llvm.amdgcn.kill(i1 %74) #2, !noalias !1 br label %main.exit main.exit: ; preds = %main_body, %if1.i %75 = and i32 %25, 65535 %76 = insertelement <4 x i32> , i32 %24, i32 0 %77 = insertelement <4 x i32> %76, i32 %75, i32 1 %78 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %77, i32 16) %79 = fmul nsz float %78, %42 %80 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %77, i32 20) %81 = fmul nsz float %80, %44 %82 = fmul nsz float %79, 5.000000e-01 %83 = fmul nsz float %81, 5.000000e-01 %84 = fdiv nsz float 1.000000e+00, %46, !fpmath !6 %85 = call nsz float @llvm.minnum.f32(float %84, float 0x47EFFFFFE0000000) #3 %86 = call nsz float @llvm.maxnum.f32(float %85, float 0xC7EFFFFFE0000000) #3 %87 = fmul nsz float %82, %86 %88 = fadd nsz float %87, 5.000000e-01 %89 = fmul nsz float %83, %86 %90 = fadd nsz float %89, 5.000000e-01 %91 = fmul nsz float %88, 1.562500e-02 %92 = fmul nsz float %90, 1.562500e-02 %93 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(2)* %3, i64 0, i64 18, !amdgpu.uniform !0 %94 = load <8 x i32>, <8 x i32> addrspace(2)* %93, align 32, !invariant.load !0, !alias.scope !1, !noalias !4 %95 = bitcast [0 x <8 x i32>] addrspace(2)* %3 to [0 x <4 x i32>] addrspace(2)* %96 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %95, i64 0, i64 39, !amdgpu.uniform !0 %97 = load <4 x i32>, <4 x i32> addrspace(2)* %96, align 16, !invariant.load !0, !alias.scope !1, !noalias !4 %98 = bitcast float %91 to i32 %99 = bitcast float %92 to i32 %100 = insertelement <4 x i32> , i32 %98, i32 0 %101 = insertelement <4 x i32> %100, i32 %99, i32 1 %102 = bitcast <4 x i32> %101 to <4 x float> %103 = call float @llvm.amdgcn.image.sample.l.f32.v4f32.v8i32(<4 x float> %102, <8 x i32> %94, <4 x i32> %97, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %104 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %77, i32 0) %105 = fsub nsz float %36, %104 %106 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %77, i32 4) %107 = fsub nsz float %38, %106 %108 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %77, i32 8) %109 = fsub nsz float %40, %108 %110 = fmul nsz float %105, %105 %111 = fmul nsz float %107, %107 %112 = fadd nsz float %111, %110 %113 = fmul nsz float %109, %109 %114 = fadd nsz float %112, %113 %115 = call nsz float @llvm.fabs.f32(float %114) #2 %116 = call nsz float @llvm.sqrt.f32(float %115) #3 %117 = fdiv nsz float 1.000000e+00, %116, !fpmath !6 %118 = call nsz float @llvm.minnum.f32(float %117, float 0x47EFFFFFE0000000) #3 %119 = fdiv nsz float 1.000000e+00, %118, !fpmath !6 %120 = call nsz float @llvm.minnum.f32(float %119, float 0x47EFFFFFE0000000) #3 %121 = call nsz float @llvm.maxnum.f32(float %120, float 0xC7EFFFFFE0000000) #3 %122 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %77, i32 32) %123 = fsub nsz float %121, %122 %124 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %77, i32 36) %125 = fmul nsz float %123, %124 %126 = fadd nsz float %125, %103 %127 = fmul nsz float %125, 5.000000e-01 %128 = fadd nsz float %127, 5.000000e-01 %129 = fsub nsz float %126, %128 %130 = fcmp olt float %129, -0.000000e+00 call void @llvm.amdgcn.kill(i1 %130) #2, !noalias !1 %131 = fcmp nsz ogt float %30, 0.000000e+00 call void @llvm.amdgcn.kill(i1 %131) #2, !noalias !1 %132 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(2)* %3, i64 0, i64 16, !amdgpu.uniform !0 %133 = load <8 x i32>, <8 x i32> addrspace(2)* %132, align 32, !invariant.load !0, !alias.scope !1, !noalias !4 %134 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %95, i64 0, i64 35, !amdgpu.uniform !0 %135 = load <4 x i32>, <4 x i32> addrspace(2)* %134, align 16, !invariant.load !0, !alias.scope !1, !noalias !4 %136 = bitcast float %32 to i32 %137 = bitcast float %34 to i32 %138 = insertelement <2 x i32> undef, i32 %136, i32 0 %139 = insertelement <2 x i32> %138, i32 %137, i32 1 %140 = bitcast <2 x i32> %139 to <2 x float> %141 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %140, <8 x i32> %133, <4 x i32> %135, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #3 %142 = extractelement <4 x float> %141, i32 3 %143 = fadd nsz float %142, -5.000000e-01 %144 = fcmp nsz ogt float %143, 0.000000e+00 call void @llvm.amdgcn.kill(i1 %144) #2, !noalias !1 %145 = fcmp nsz olt float %4, 1.000000e+00 call void @llvm.amdgcn.kill(i1 %145) #2 call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 15, <2 x i16> , <2 x i16> , i1 true, i1 true) #2 ret void } ; Function Attrs: argmemonly nounwind declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #5 ; Function Attrs: argmemonly nounwind declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #5 ; Function Attrs: nounwind readonly declare float @llvm.amdgcn.image.sample.l.f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 ; Function Attrs: nounwind readonly declare float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind readonly } attributes #2 = { nounwind } attributes #3 = { nounwind readnone } attributes #4 = { "no-signed-zeros-fp-math"="true" } attributes #5 = { argmemonly nounwind } !0 = !{} !1 = !{!2} !2 = distinct !{!2, !3, !"main: argument 1"} !3 = distinct !{!3, !"main"} !4 = !{!5} !5 = distinct !{!5, !3, !"main: argument 0"} !6 = !{float 2.500000e+00} SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 1 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x4 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 4 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: wrapper: BB42_0: s_mov_b64 s[10:11], exec ; BE8A017E s_wqm_b64 exec, exec ; BEFE077E s_mov_b32 m0, s9 ; BEFC0009 v_interp_p1_f32 v2, v0, attr3.w ; D4080F00 v_interp_p2_f32 v2, v1, attr3.w ; D4090F01 s_mov_b64 s[0:1], s[4:5] ; BE800104 v_cmp_lt_f32_e32 vcc, 0.5, v2 ; 7C8204F0 s_and_saveexec_b64 s[2:3], vcc ; BE82206A s_xor_b64 s[2:3], exec, s[2:3] ; 8882027E s_cbranch_execz BB42_2 ; BF880000 BB42_1: s_load_dwordx8 s[12:19], s[6:7], 0x280 ; C00E0303 00000280 s_load_dwordx4 s[20:23], s[6:7], 0x2b0 ; C00A0503 000002B0 s_mov_b32 m0, s9 ; BEFC0009 v_interp_p1_f32 v4, v0, attr3.y ; D4100D00 v_interp_p1_f32 v3, v0, attr3.x ; D40C0C00 v_interp_p1_f32 v2, v0, attr3.z ; D4080E00 v_interp_p2_f32 v4, v1, attr3.y ; D4110D01 v_interp_p2_f32 v3, v1, attr3.x ; D40D0C01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v5, v[3:4], s[12:19], s[20:23] dmask:0x1 ; F0800100 00A30503 v_interp_p2_f32 v2, v1, attr3.z ; D4090E01 v_mov_b32_e32 v3, v4 ; 7E060304 image_sample v2, v[2:3], s[12:19], s[20:23] dmask:0x1 ; F0800100 00A30202 s_waitcnt vmcnt(0) ; BF8C0F70 v_max_f32_e32 v2, v5, v2 ; 16040505 v_sub_f32_e32 v2, 0.5, v2 ; 040404F0 v_cmpx_lt_f32_e32 vcc, 0, v2 ; 7CA20480 BB42_2: s_or_b64 exec, exec, s[2:3] ; 87FE027E s_mov_b32 m0, s9 ; BEFC0009 s_and_b32 s1, s1, 0xffff ; 8601FF01 0000FFFF s_mov_b32 s3, 0x27fac ; BE8300FF 00027FAC s_mov_b32 s2, 48 ; BE8200B0 v_interp_p1_f32 v4, v0, attr4.w ; D4101300 v_interp_p1_f32 v2, v0, attr0.x ; D4080000 v_interp_p1_f32 v3, v0, attr0.y ; D40C0100 v_interp_p1_f32 v8, v0, attr1.x ; D4200400 v_interp_p1_f32 v9, v0, attr1.y ; D4240500 v_interp_p1_f32 v10, v0, attr1.z ; D4280600 v_interp_p1_f32 v5, v0, attr2.x ; D4140800 v_interp_p1_f32 v6, v0, attr2.y ; D4180900 v_interp_p1_f32 v0, v0, attr2.w ; D4000B00 s_buffer_load_dwordx2 s[4:5], s[0:3], 0x0 ; C0260100 00000000 s_buffer_load_dword s9, s[0:3], 0x8 ; C0220240 00000008 s_buffer_load_dwordx2 s[12:13], s[0:3], 0x10 ; C0260300 00000010 v_interp_p2_f32 v0, v1, attr2.w ; D4010B01 v_rcp_f32_e32 v0, v0 ; 7E004500 v_interp_p2_f32 v5, v1, attr2.x ; D4150801 v_interp_p2_f32 v6, v1, attr2.y ; D4190901 v_mov_b32_e32 v11, 0x7f7fffff ; 7E1602FF 7F7FFFFF s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e64 v5, s12, v5 div:2 ; D1050005 18020A0C v_mul_f32_e64 v6, s13, v6 div:2 ; D1050006 18020C0D s_load_dwordx8 s[12:19], s[6:7], 0x240 ; C00E0303 00000240 s_load_dwordx4 s[20:23], s[6:7], 0x270 ; C00A0503 00000270 v_min_f32_e32 v0, v0, v11 ; 14001700 v_mov_b32_e32 v12, 0xff7fffff ; 7E1802FF FF7FFFFF v_max_f32_e32 v0, v0, v12 ; 16001900 v_mad_f32 v5, v5, v0, 0.5 ; D1C10005 03C20105 v_mad_f32 v0, v6, v0, 0.5 ; D1C10000 03C20106 v_mov_b32_e32 v6, 0x3c800000 ; 7E0C02FF 3C800000 v_interp_p2_f32 v8, v1, attr1.x ; D4210401 v_interp_p2_f32 v9, v1, attr1.y ; D4250501 v_mul_f32_e32 v5, v5, v6 ; 0A0A0D05 v_mul_f32_e32 v6, v0, v6 ; 0A0C0D00 v_subrev_f32_e32 v0, s4, v8 ; 06001004 v_mov_b32_e32 v7, 0 ; 7E0E0280 v_subrev_f32_e32 v8, s5, v9 ; 06101205 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v5, v[5:8], s[12:19], s[20:23] dmask:0x1 ; F0900100 00A30505 v_mul_f32_e32 v0, v0, v0 ; 0A000100 v_interp_p2_f32 v10, v1, attr1.z ; D4290601 v_subrev_f32_e32 v9, s9, v10 ; 06121409 v_mac_f32_e32 v0, v8, v8 ; 2C001108 v_mac_f32_e32 v0, v9, v9 ; 2C001309 v_rsq_f32_e64 v0, |v0| ; D1640100 00000100 s_buffer_load_dwordx2 s[0:1], s[0:3], 0x20 ; C0260000 00000020 v_interp_p2_f32 v4, v1, attr4.w ; D4111301 v_interp_p2_f32 v2, v1, attr0.x ; D4090001 v_min_f32_e32 v0, v0, v11 ; 14001700 v_rcp_f32_e32 v0, v0 ; 7E004500 v_interp_p2_f32 v3, v1, attr0.y ; D40D0101 v_min_f32_e32 v0, v0, v11 ; 14001700 v_max_f32_e32 v0, v0, v12 ; 16001900 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v6, s1, v0 ; 0A0C0001 v_mad_f32 v6, v6, 0.5, 0.5 ; D1C10006 03C1E106 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, s1, v0 ; 2C0A0001 v_sub_f32_e32 v0, v5, v6 ; 04000D05 v_bfrev_b32_e32 v5, 1 ; 7E0A5881 v_cmp_lt_f32_e32 vcc, v0, v5 ; 7C820B00 s_and_b64 exec, exec, vcc ; 86FE6A7E s_cbranch_execnz BB42_4 ; BF890000 exp null off, off, off, off done vm ; C4001890 00000000 s_endpgm ; BF810000 BB42_4: v_cmpx_lt_f32_e32 vcc, 0, v4 ; 7CA20880 s_load_dwordx8 s[12:19], s[6:7], 0x200 ; C00E0303 00000200 s_load_dwordx4 s[0:3], s[6:7], 0x230 ; C00A0003 00000230 s_and_b64 exec, exec, s[10:11] ; 86FE0A7E s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, v[2:3], s[12:19], s[0:3] dmask:0x8 ; F0800800 00030002 s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v0, -0.5, v0 ; 020000F1 v_cmpx_lt_f32_e32 vcc, 0, v0 ; 7CA20080 v_cmpx_gt_f32_e64 vcc, 1.0, s8 ; D054006A 000010F2 v_mov_b32_e32 v0, 0x3c003c00 ; 7E0002FF 3C003C00 exp mrt0 v0, v0, v0, v0 done compr vm ; C4001C0F 00000000 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0x0002 SPI_PS_INPUT_ENA = 0x0002 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 572 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 8 ********************