FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END radeonsi: Compiling shader 1 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32, float, float, float, float) #0 { main_body: %25 = bitcast float %3 to i32 %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %25, 6 %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %26, float %21, 7 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %27, float %22, 8 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 9 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 10 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31 } attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..3] DCL TEMP[0] 0: MUL TEMP[0], IN[0].xxxx, CONST[0] 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] 4: MOV OUT[1], IN[1] 5: END radeonsi: Compiling shader 2 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs void @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) #0 { main_body: %14 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 0, !amdgpu.uniform !0 %15 = load <4 x i32>, <4 x i32> addrspace(2)* %14, align 16, !invariant.load !0 %16 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %15, i32 %12, i32 0, i1 false, i1 false) #2 %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 1, !amdgpu.uniform !0 %22 = load <4 x i32>, <4 x i32> addrspace(2)* %21, align 16, !invariant.load !0 %23 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %22, i32 %13, i32 0, i1 false, i1 false) #2 %24 = extractelement <4 x float> %23, i32 0 %25 = extractelement <4 x float> %23, i32 1 %26 = extractelement <4 x float> %23, i32 2 %27 = extractelement <4 x float> %23, i32 3 %28 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %1, i64 0, i64 16, !amdgpu.uniform !0 %29 = load <4 x i32>, <4 x i32> addrspace(2)* %28, align 16, !invariant.load !0 %30 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 0) %31 = fmul nsz float %17, %30 %32 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 4) %33 = fmul nsz float %17, %32 %34 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 8) %35 = fmul nsz float %17, %34 %36 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 12) %37 = fmul nsz float %17, %36 %38 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 16) %39 = fmul nsz float %18, %38 %40 = fadd nsz float %39, %31 %41 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 20) %42 = fmul nsz float %18, %41 %43 = fadd nsz float %42, %33 %44 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 24) %45 = fmul nsz float %18, %44 %46 = fadd nsz float %45, %35 %47 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 28) %48 = fmul nsz float %18, %47 %49 = fadd nsz float %48, %37 %50 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 32) %51 = fmul nsz float %19, %50 %52 = fadd nsz float %51, %40 %53 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 36) %54 = fmul nsz float %19, %53 %55 = fadd nsz float %54, %43 %56 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 40) %57 = fmul nsz float %19, %56 %58 = fadd nsz float %57, %46 %59 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 44) %60 = fmul nsz float %19, %59 %61 = fadd nsz float %60, %49 %62 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 48) %63 = fmul nsz float %20, %62 %64 = fadd nsz float %63, %52 %65 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 52) %66 = fmul nsz float %20, %65 %67 = fadd nsz float %66, %55 %68 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 56) %69 = fmul nsz float %20, %68 %70 = fadd nsz float %69, %58 %71 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 60) %72 = fmul nsz float %20, %71 %73 = fadd nsz float %72, %61 %74 = and i32 %7, 1 %75 = icmp eq i32 %74, 0 br i1 %75, label %endif-block, label %if-true-block if-true-block: ; preds = %main_body %76 = call nsz float @llvm.maxnum.f32(float %24, float 0.000000e+00) #2 %77 = call nsz float @llvm.minnum.f32(float %76, float 1.000000e+00) #2 %78 = call nsz float @llvm.maxnum.f32(float %25, float 0.000000e+00) #2 %79 = call nsz float @llvm.minnum.f32(float %78, float 1.000000e+00) #2 %80 = call nsz float @llvm.maxnum.f32(float %26, float 0.000000e+00) #2 %81 = call nsz float @llvm.minnum.f32(float %80, float 1.000000e+00) #2 %82 = call nsz float @llvm.maxnum.f32(float %27, float 0.000000e+00) #2 %83 = call nsz float @llvm.minnum.f32(float %82, float 1.000000e+00) #2 br label %endif-block endif-block: ; preds = %main_body, %if-true-block %OUT1.w.0 = phi float [ %83, %if-true-block ], [ %27, %main_body ] %OUT1.z.0 = phi float [ %81, %if-true-block ], [ %26, %main_body ] %OUT1.y.0 = phi float [ %79, %if-true-block ], [ %25, %main_body ] %OUT1.x.0 = phi float [ %77, %if-true-block ], [ %24, %main_body ] call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %OUT1.x.0, float %OUT1.y.0, float %OUT1.z.0, float %OUT1.w.0, i1 false, i1 false) #4 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %64, float %67, float %70, float %73, i1 true, i1 false) #4 ret void } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.maxnum.f32(float, float) #3 ; Function Attrs: nounwind readnone speculatable declare float @llvm.minnum.f32(float, float) #3 ; Function Attrs: nounwind declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #4 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } attributes #3 = { nounwind readnone speculatable } attributes #4 = { nounwind } !0 = !{} VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL TEMP[0] IMM[0] FLT32 { 0.5000, 0.2500, -0.2500, 0.0000} 0: MOV OUT[0], IN[0] 1: MOV OUT[2], IN[1] 2: MOV OUT[1], IN[2] 3: MUL TEMP[0].x, IN[1].wwww, IMM[0].xxxx 4: MUL TEMP[0].y, IN[1].wwww, IMM[0].yyyy 5: MOV OUT[3].x, IN[1] 6: MAD OUT[3].y, IN[1].yyyy, TEMP[0].xxxx, IMM[0].yyyy 7: MAD OUT[3].z, IN[1].yyyy, TEMP[0].yyyy, IMM[0].yyyy 8: RCP OUT[3].w, TEMP[0].xxxx 9: MOV OUT[4].x, IN[1] 10: MAD OUT[4].y, IN[1].yyyy, TEMP[0].xxxx, IMM[0].zzzz 11: MAD OUT[4].z, IN[1].yyyy, TEMP[0].yyyy, IMM[0].zzzz 12: RCP OUT[4].w, TEMP[0].yyyy 13: END radeonsi: Compiling shader 3 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs void @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) #0 { main_body: %15 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 0, !amdgpu.uniform !0 %16 = load <4 x i32>, <4 x i32> addrspace(2)* %15, align 16, !invariant.load !0 %17 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %16, i32 %12, i32 0, i1 false, i1 false) #4 %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 1, !amdgpu.uniform !0 %23 = load <4 x i32>, <4 x i32> addrspace(2)* %22, align 16, !invariant.load !0 %24 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %23, i32 %13, i32 0, i1 false, i1 false) #4 %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 2, !amdgpu.uniform !0 %30 = load <4 x i32>, <4 x i32> addrspace(2)* %29, align 16, !invariant.load !0 %31 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %30, i32 %14, i32 0, i1 false, i1 false) #4 %32 = extractelement <4 x float> %31, i32 0 %33 = extractelement <4 x float> %31, i32 1 %34 = extractelement <4 x float> %31, i32 2 %35 = extractelement <4 x float> %31, i32 3 %36 = fmul nsz float %28, 5.000000e-01 %37 = fmul nsz float %28, 2.500000e-01 %38 = fmul nsz float %26, %36 %39 = fadd nsz float %38, 2.500000e-01 %40 = fmul nsz float %26, %37 %41 = fadd nsz float %40, 2.500000e-01 %42 = fdiv nsz float 1.000000e+00, %36, !fpmath !1 %43 = fadd nsz float %38, -2.500000e-01 %44 = fadd nsz float %40, -2.500000e-01 %45 = fdiv nsz float 1.000000e+00, %37, !fpmath !1 %46 = and i32 %7, 1 %47 = icmp eq i32 %46, 0 br i1 %47, label %endif-block, label %if-true-block if-true-block: ; preds = %main_body %48 = call nsz float @llvm.maxnum.f32(float %32, float 0.000000e+00) #4 %49 = call nsz float @llvm.minnum.f32(float %48, float 1.000000e+00) #4 %50 = call nsz float @llvm.maxnum.f32(float %33, float 0.000000e+00) #4 %51 = call nsz float @llvm.minnum.f32(float %50, float 1.000000e+00) #4 %52 = call nsz float @llvm.maxnum.f32(float %34, float 0.000000e+00) #4 %53 = call nsz float @llvm.minnum.f32(float %52, float 1.000000e+00) #4 %54 = call nsz float @llvm.maxnum.f32(float %35, float 0.000000e+00) #4 %55 = call nsz float @llvm.minnum.f32(float %54, float 1.000000e+00) #4 br label %endif-block endif-block: ; preds = %main_body, %if-true-block %OUT1.w.0 = phi float [ %55, %if-true-block ], [ %35, %main_body ] %OUT1.z.0 = phi float [ %53, %if-true-block ], [ %34, %main_body ] %OUT1.y.0 = phi float [ %51, %if-true-block ], [ %33, %main_body ] %OUT1.x.0 = phi float [ %49, %if-true-block ], [ %32, %main_body ] call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %OUT1.x.0, float %OUT1.y.0, float %OUT1.z.0, float %OUT1.w.0, i1 false, i1 false) #3 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %25, float %26, float %27, float %28, i1 false, i1 false) #3 call void @llvm.amdgcn.exp.f32(i32 34, i32 15, float %25, float %39, float %41, float %42, i1 false, i1 false) #3 call void @llvm.amdgcn.exp.f32(i32 35, i32 15, float %25, float %43, float %44, float %45, i1 false, i1 false) #3 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %18, float %19, float %20, float %21, i1 true, i1 false) #3 ret void } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.maxnum.f32(float, float) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.minnum.f32(float, float) #2 ; Function Attrs: nounwind declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #3 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone speculatable } attributes #3 = { nounwind } attributes #4 = { nounwind readnone } !0 = !{} !1 = !{float 2.500000e+00} FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D_ARRAY, FLOAT DCL SVIEW[1], 2D_ARRAY, FLOAT DCL SVIEW[2], 2D_ARRAY, FLOAT DCL CONST[0..3] DCL TEMP[0..2] IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: TEX TEMP[0].x, IN[0], SAMP[0], 2D_ARRAY 1: TEX TEMP[0].y, IN[0], SAMP[1], 2D_ARRAY 2: TEX TEMP[0].z, IN[0], SAMP[2], 2D_ARRAY 3: MOV TEMP[0].w, IMM[0].xxxx 4: DP4 OUT[0].x, CONST[0], TEMP[0] 5: DP4 OUT[0].y, CONST[1], TEMP[0] 6: DP4 OUT[0].z, CONST[2], TEMP[0] 7: MOV TEMP[1].w, TEMP[0].zzzz 8: SLE TEMP[2].w, TEMP[1], CONST[3].xxxx 9: SGT TEMP[1].w, TEMP[1], CONST[3].yyyy 10: MAX OUT[0].w, TEMP[1], TEMP[2] 11: END radeonsi: Compiling shader 4 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %21 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %22 = load <8 x i32>, <8 x i32> addrspace(2)* %21, align 32, !invariant.load !0 %23 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %24 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %23, i64 0, i64 35, !amdgpu.uniform !0 %25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !invariant.load !0 %26 = bitcast <2 x i32> %10 to <2 x float> %27 = extractelement <2 x float> %26, i32 0 %28 = extractelement <2 x float> %26, i32 1 %29 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 0, i32 0, i32 %4) #3 %30 = call nsz float @llvm.amdgcn.interp.p2(float %29, float %28, i32 0, i32 0, i32 %4) #3 %31 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 1, i32 0, i32 %4) #3 %32 = call nsz float @llvm.amdgcn.interp.p2(float %31, float %28, i32 1, i32 0, i32 %4) #3 %33 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 2, i32 0, i32 %4) #3 %34 = call nsz float @llvm.amdgcn.interp.p2(float %33, float %28, i32 2, i32 0, i32 %4) #3 %35 = bitcast float %30 to i32 %36 = bitcast float %32 to i32 %37 = bitcast float %34 to i32 %38 = insertelement <4 x i32> undef, i32 %35, i32 0 %39 = insertelement <4 x i32> %38, i32 %36, i32 1 %40 = insertelement <4 x i32> %39, i32 %37, i32 2 %41 = bitcast <4 x i32> %40 to <4 x float> %42 = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %41, <8 x i32> %22, <4 x i32> %25, i32 1, i1 false, i1 false, i1 false, i1 false, i1 true) %43 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !invariant.load !0 %45 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %23, i64 0, i64 39, !amdgpu.uniform !0 %46 = load <4 x i32>, <4 x i32> addrspace(2)* %45, align 16, !invariant.load !0 %47 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %41, <8 x i32> %44, <4 x i32> %46, i32 15, i1 false, i1 false, i1 false, i1 false, i1 true) #3 %48 = extractelement <4 x float> %47, i32 1 %49 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 20, !amdgpu.uniform !0 %50 = load <8 x i32>, <8 x i32> addrspace(2)* %49, align 32, !invariant.load !0 %51 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %23, i64 0, i64 43, !amdgpu.uniform !0 %52 = load <4 x i32>, <4 x i32> addrspace(2)* %51, align 16, !invariant.load !0 %53 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %41, <8 x i32> %50, <4 x i32> %52, i32 15, i1 false, i1 false, i1 false, i1 false, i1 true) #3 %54 = extractelement <4 x float> %53, i32 2 %55 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %1, i64 0, i64 16, !amdgpu.uniform !0 %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !invariant.load !0 %57 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 0) %58 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 4) %59 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 8) %60 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 12) %61 = fmul nsz float %57, %42 %62 = fmul nsz float %58, %48 %63 = fadd nsz float %61, %62 %64 = fmul nsz float %59, %54 %65 = fadd nsz float %63, %64 %66 = fadd nsz float %65, %60 %67 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 16) %68 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 20) %69 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 24) %70 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 28) %71 = fmul nsz float %67, %42 %72 = fmul nsz float %68, %48 %73 = fadd nsz float %71, %72 %74 = fmul nsz float %69, %54 %75 = fadd nsz float %73, %74 %76 = fadd nsz float %75, %70 %77 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 32) %78 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 36) %79 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 40) %80 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 44) %81 = fmul nsz float %77, %42 %82 = fmul nsz float %78, %48 %83 = fadd nsz float %81, %82 %84 = fmul nsz float %79, %54 %85 = fadd nsz float %83, %84 %86 = fadd nsz float %85, %80 %87 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 48) %88 = fcmp nsz ole float %54, %87 %89 = select i1 %88, float 1.000000e+00, float 0.000000e+00 %90 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 52) %91 = fcmp nsz ogt float %54, %90 %92 = select i1 %91, float 1.000000e+00, float 0.000000e+00 %93 = call nsz float @llvm.maxnum.f32(float %92, float %89) #3 %94 = bitcast float %3 to i32 %95 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %94, 6 %96 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %95, float %66, 7 %97 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %96, float %76, 8 %98 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %97, float %86, 9 %99 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %98, float %93, 10 %100 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %99, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %100 } ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #3 ; Function Attrs: nounwind readnone speculatable declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readonly declare float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind readonly } attributes #3 = { nounwind readnone } !0 = !{} FRAG DCL IN[0], GENERIC[1], LINEAR DCL IN[1], GENERIC[2], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D_ARRAY, FLOAT DCL SVIEW[1], 2D_ARRAY, FLOAT DCL SVIEW[2], 2D_ARRAY, FLOAT DCL CONST[0..3] DCL TEMP[0..4] IMM[0] FLT32 { -0.5000, 0.0000, 0.5000, 1.0000} IMM[1] FLT32 { 2.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[1].x, IN[0] 1: ADD TEMP[1].yz, IN[0], IMM[0].xxxx 2: ROUND TEMP[1].yz, TEMP[1] 3: MOV TEMP[1].w, IMM[0].yyyy 4: ADD TEMP[1].yz, TEMP[1], IMM[0].zzzz 5: MUL TEMP[1].y, TEMP[1], IN[0].wwww 6: MUL TEMP[1].z, TEMP[1], IN[1].wwww 7: MOV TEMP[3].x, IN[1] 8: ADD TEMP[3].yz, IN[1], IMM[0].xxxx 9: ROUND TEMP[3].yz, TEMP[3] 10: MOV TEMP[3].w, IMM[0].wwww 11: ADD TEMP[3].yz, TEMP[3], IMM[0].zzzz 12: MUL TEMP[3].y, TEMP[3], IN[0].wwww 13: MUL TEMP[3].z, TEMP[3], IN[1].wwww 14: TEX TEMP[2].x, TEMP[1].xyww, SAMP[0], 2D_ARRAY 15: TEX TEMP[2].y, TEMP[1].xzww, SAMP[1], 2D_ARRAY 16: TEX TEMP[2].z, TEMP[1].xzww, SAMP[2], 2D_ARRAY 17: TEX TEMP[4].x, TEMP[3].xyww, SAMP[0], 2D_ARRAY 18: TEX TEMP[4].y, TEMP[3].xzww, SAMP[1], 2D_ARRAY 19: TEX TEMP[4].z, TEMP[3].xzww, SAMP[2], 2D_ARRAY 20: ROUND TEMP[1].yz, IN[0] 21: ADD TEMP[1].yz, TEMP[1], -IN[0] 22: MUL TEMP[1].yz, |TEMP[1]|, IMM[1].xxxx 23: LRP TEMP[0], TEMP[1].yzzz, TEMP[2], TEMP[4] 24: MOV TEMP[0].w, IMM[0].wwww 25: DP4 OUT[0].x, CONST[0], TEMP[0] 26: DP4 OUT[0].y, CONST[1], TEMP[0] 27: DP4 OUT[0].z, CONST[2], TEMP[0] 28: MOV TEMP[1].w, TEMP[0].zzzz 29: SLE TEMP[2].w, TEMP[1], CONST[3].xxxx 30: SGT TEMP[1].w, TEMP[1], CONST[3].yyyy 31: MAX OUT[0].w, TEMP[1], TEMP[2] 32: END radeonsi: Compiling shader 5 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %21 = bitcast <2 x i32> %10 to <2 x float> %22 = extractelement <2 x float> %21, i32 0 %23 = extractelement <2 x float> %21, i32 1 %24 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 0, i32 0, i32 %4) #3 %25 = call nsz float @llvm.amdgcn.interp.p2(float %24, float %23, i32 0, i32 0, i32 %4) #3 %26 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 1, i32 0, i32 %4) #3 %27 = call nsz float @llvm.amdgcn.interp.p2(float %26, float %23, i32 1, i32 0, i32 %4) #3 %28 = fadd nsz float %27, -5.000000e-01 %29 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 2, i32 0, i32 %4) #3 %30 = call nsz float @llvm.amdgcn.interp.p2(float %29, float %23, i32 2, i32 0, i32 %4) #3 %31 = fadd nsz float %30, -5.000000e-01 %32 = call nsz float @llvm.rint.f32(float %28) #3 %33 = call nsz float @llvm.rint.f32(float %31) #3 %34 = fadd nsz float %32, 5.000000e-01 %35 = fadd nsz float %33, 5.000000e-01 %36 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 3, i32 0, i32 %4) #3 %37 = call nsz float @llvm.amdgcn.interp.p2(float %36, float %23, i32 3, i32 0, i32 %4) #3 %38 = fmul nsz float %34, %37 %39 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 3, i32 1, i32 %4) #3 %40 = call nsz float @llvm.amdgcn.interp.p2(float %39, float %23, i32 3, i32 1, i32 %4) #3 %41 = fmul nsz float %35, %40 %42 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 0, i32 1, i32 %4) #3 %43 = call nsz float @llvm.amdgcn.interp.p2(float %42, float %23, i32 0, i32 1, i32 %4) #3 %44 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 1, i32 1, i32 %4) #3 %45 = call nsz float @llvm.amdgcn.interp.p2(float %44, float %23, i32 1, i32 1, i32 %4) #3 %46 = fadd nsz float %45, -5.000000e-01 %47 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 2, i32 1, i32 %4) #3 %48 = call nsz float @llvm.amdgcn.interp.p2(float %47, float %23, i32 2, i32 1, i32 %4) #3 %49 = fadd nsz float %48, -5.000000e-01 %50 = call nsz float @llvm.rint.f32(float %46) #3 %51 = call nsz float @llvm.rint.f32(float %49) #3 %52 = fadd nsz float %50, 5.000000e-01 %53 = fadd nsz float %51, 5.000000e-01 %54 = fmul nsz float %52, %37 %55 = fmul nsz float %53, %40 %56 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !invariant.load !0 %58 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %59 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %58, i64 0, i64 35, !amdgpu.uniform !0 %60 = load <4 x i32>, <4 x i32> addrspace(2)* %59, align 16, !invariant.load !0 %61 = bitcast float %25 to i32 %62 = bitcast float %38 to i32 %63 = insertelement <4 x i32> undef, i32 %61, i32 0 %64 = insertelement <4 x i32> %63, i32 0, i32 2 %65 = insertelement <4 x i32> %64, i32 %62, i32 1 %66 = bitcast <4 x i32> %65 to <4 x float> %67 = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %66, <8 x i32> %57, <4 x i32> %60, i32 1, i1 false, i1 false, i1 false, i1 false, i1 true) %68 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %69 = load <8 x i32>, <8 x i32> addrspace(2)* %68, align 32, !invariant.load !0 %70 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %58, i64 0, i64 39, !amdgpu.uniform !0 %71 = load <4 x i32>, <4 x i32> addrspace(2)* %70, align 16, !invariant.load !0 %72 = bitcast float %41 to i32 %73 = insertelement <4 x i32> %63, i32 0, i32 2 %74 = insertelement <4 x i32> %73, i32 %72, i32 1 %75 = bitcast <4 x i32> %74 to <4 x float> %76 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %75, <8 x i32> %69, <4 x i32> %71, i32 15, i1 false, i1 false, i1 false, i1 false, i1 true) #3 %77 = extractelement <4 x float> %76, i32 1 %78 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 20, !amdgpu.uniform !0 %79 = load <8 x i32>, <8 x i32> addrspace(2)* %78, align 32, !invariant.load !0 %80 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %58, i64 0, i64 43, !amdgpu.uniform !0 %81 = load <4 x i32>, <4 x i32> addrspace(2)* %80, align 16, !invariant.load !0 %82 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %75, <8 x i32> %79, <4 x i32> %81, i32 15, i1 false, i1 false, i1 false, i1 false, i1 true) #3 %83 = extractelement <4 x float> %82, i32 2 %84 = bitcast float %43 to i32 %85 = bitcast float %54 to i32 %86 = insertelement <4 x i32> undef, i32 %84, i32 0 %87 = insertelement <4 x i32> %86, i32 1065353216, i32 2 %88 = insertelement <4 x i32> %87, i32 %85, i32 1 %89 = bitcast <4 x i32> %88 to <4 x float> %90 = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %89, <8 x i32> %57, <4 x i32> %60, i32 1, i1 false, i1 false, i1 false, i1 false, i1 true) %91 = bitcast float %55 to i32 %92 = insertelement <4 x i32> %86, i32 1065353216, i32 2 %93 = insertelement <4 x i32> %92, i32 %91, i32 1 %94 = bitcast <4 x i32> %93 to <4 x float> %95 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %94, <8 x i32> %69, <4 x i32> %71, i32 15, i1 false, i1 false, i1 false, i1 false, i1 true) #3 %96 = extractelement <4 x float> %95, i32 1 %97 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %94, <8 x i32> %79, <4 x i32> %81, i32 15, i1 false, i1 false, i1 false, i1 false, i1 true) #3 %98 = extractelement <4 x float> %97, i32 2 %99 = call nsz float @llvm.rint.f32(float %27) #3 %100 = call nsz float @llvm.rint.f32(float %30) #3 %101 = fsub nsz float %99, %27 %102 = fsub nsz float %100, %30 %103 = call nsz float @llvm.fabs.f32(float %101) #4 %104 = fmul nsz float %103, 2.000000e+00 %105 = call nsz float @llvm.fabs.f32(float %102) #4 %106 = fmul nsz float %105, 2.000000e+00 %107 = fsub nsz float 1.000000e+00, %104 %108 = fmul nsz float %67, %104 %109 = fmul nsz float %90, %107 %110 = fadd nsz float %108, %109 %111 = fsub nsz float 1.000000e+00, %106 %112 = fmul nsz float %77, %106 %113 = fmul nsz float %96, %111 %114 = fadd nsz float %112, %113 %115 = fmul nsz float %83, %106 %116 = fmul nsz float %98, %111 %117 = fadd nsz float %115, %116 %118 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %1, i64 0, i64 16, !amdgpu.uniform !0 %119 = load <4 x i32>, <4 x i32> addrspace(2)* %118, align 16, !invariant.load !0 %120 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %119, i32 0) %121 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %119, i32 4) %122 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %119, i32 8) %123 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %119, i32 12) %124 = fmul nsz float %120, %110 %125 = fmul nsz float %121, %114 %126 = fadd nsz float %124, %125 %127 = fmul nsz float %122, %117 %128 = fadd nsz float %126, %127 %129 = fadd nsz float %128, %123 %130 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %119, i32 16) %131 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %119, i32 20) %132 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %119, i32 24) %133 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %119, i32 28) %134 = fmul nsz float %130, %110 %135 = fmul nsz float %131, %114 %136 = fadd nsz float %134, %135 %137 = fmul nsz float %132, %117 %138 = fadd nsz float %136, %137 %139 = fadd nsz float %138, %133 %140 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %119, i32 32) %141 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %119, i32 36) %142 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %119, i32 40) %143 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %119, i32 44) %144 = fmul nsz float %140, %110 %145 = fmul nsz float %141, %114 %146 = fadd nsz float %144, %145 %147 = fmul nsz float %142, %117 %148 = fadd nsz float %146, %147 %149 = fadd nsz float %148, %143 %150 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %119, i32 48) %151 = fcmp nsz ole float %117, %150 %152 = select i1 %151, float 1.000000e+00, float 0.000000e+00 %153 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %119, i32 52) %154 = fcmp nsz ogt float %117, %153 %155 = select i1 %154, float 1.000000e+00, float 0.000000e+00 %156 = call nsz float @llvm.maxnum.f32(float %155, float %152) #3 %157 = bitcast float %3 to i32 %158 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %157, 6 %159 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %158, float %129, 7 %160 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %159, float %139, 8 %161 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %160, float %149, 9 %162 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %161, float %156, 10 %163 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %162, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %163 } ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.rint.f32(float) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #3 ; Function Attrs: nounwind readnone speculatable declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readonly declare float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind readonly } attributes #3 = { nounwind readnone } attributes #4 = { nounwind } !0 = !{} FRAG DCL IN[0], GENERIC[1], LINEAR DCL IN[1], GENERIC[2], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D_ARRAY, FLOAT DCL SVIEW[1], 2D_ARRAY, FLOAT DCL SVIEW[2], 2D_ARRAY, FLOAT DCL TEMP[0..4] IMM[0] FLT32 { -0.5000, 0.0000, 0.5000, 1.0000} IMM[1] FLT32 { 2.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[1].x, IN[0] 1: ADD TEMP[1].yz, IN[0], IMM[0].xxxx 2: ROUND TEMP[1].yz, TEMP[1] 3: MOV TEMP[1].w, IMM[0].yyyy 4: ADD TEMP[1].yz, TEMP[1], IMM[0].zzzz 5: MUL TEMP[1].y, TEMP[1], IN[0].wwww 6: MUL TEMP[1].z, TEMP[1], IN[1].wwww 7: MOV TEMP[3].x, IN[1] 8: ADD TEMP[3].yz, IN[1], IMM[0].xxxx 9: ROUND TEMP[3].yz, TEMP[3] 10: MOV TEMP[3].w, IMM[0].wwww 11: ADD TEMP[3].yz, TEMP[3], IMM[0].zzzz 12: MUL TEMP[3].y, TEMP[3], IN[0].wwww 13: MUL TEMP[3].z, TEMP[3], IN[1].wwww 14: TEX TEMP[2].x, TEMP[1].xyww, SAMP[0], 2D_ARRAY 15: TEX TEMP[2].y, TEMP[1].xzww, SAMP[1], 2D_ARRAY 16: TEX TEMP[2].z, TEMP[1].xzww, SAMP[2], 2D_ARRAY 17: TEX TEMP[4].x, TEMP[3].xyww, SAMP[0], 2D_ARRAY 18: TEX TEMP[4].y, TEMP[3].xzww, SAMP[1], 2D_ARRAY 19: TEX TEMP[4].z, TEMP[3].xzww, SAMP[2], 2D_ARRAY 20: ROUND TEMP[1].yz, IN[0] 21: ADD TEMP[1].yz, TEMP[1], -IN[0] 22: MUL TEMP[1].yz, |TEMP[1]|, IMM[1].xxxx 23: LRP TEMP[0], TEMP[1].yzzz, TEMP[2], TEMP[4] 24: MOV OUT[0].x, TEMP[0] 25: END radeonsi: Compiling shader 6 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %21 = bitcast <2 x i32> %10 to <2 x float> %22 = extractelement <2 x float> %21, i32 0 %23 = extractelement <2 x float> %21, i32 1 %24 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 0, i32 0, i32 %4) #3 %25 = call nsz float @llvm.amdgcn.interp.p2(float %24, float %23, i32 0, i32 0, i32 %4) #3 %26 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 1, i32 0, i32 %4) #3 %27 = call nsz float @llvm.amdgcn.interp.p2(float %26, float %23, i32 1, i32 0, i32 %4) #3 %28 = fadd nsz float %27, -5.000000e-01 %29 = call nsz float @llvm.rint.f32(float %28) #3 %30 = fadd nsz float %29, 5.000000e-01 %31 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 3, i32 0, i32 %4) #3 %32 = call nsz float @llvm.amdgcn.interp.p2(float %31, float %23, i32 3, i32 0, i32 %4) #3 %33 = fmul nsz float %30, %32 %34 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 0, i32 1, i32 %4) #3 %35 = call nsz float @llvm.amdgcn.interp.p2(float %34, float %23, i32 0, i32 1, i32 %4) #3 %36 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 1, i32 1, i32 %4) #3 %37 = call nsz float @llvm.amdgcn.interp.p2(float %36, float %23, i32 1, i32 1, i32 %4) #3 %38 = fadd nsz float %37, -5.000000e-01 %39 = call nsz float @llvm.rint.f32(float %38) #3 %40 = fadd nsz float %39, 5.000000e-01 %41 = fmul nsz float %40, %32 %42 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %43 = load <8 x i32>, <8 x i32> addrspace(2)* %42, align 32, !invariant.load !0 %44 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %45 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %44, i64 0, i64 35, !amdgpu.uniform !0 %46 = load <4 x i32>, <4 x i32> addrspace(2)* %45, align 16, !invariant.load !0 %47 = bitcast float %25 to i32 %48 = bitcast float %33 to i32 %49 = insertelement <4 x i32> , i32 %47, i32 0 %50 = insertelement <4 x i32> %49, i32 %48, i32 1 %51 = bitcast <4 x i32> %50 to <4 x float> %52 = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %51, <8 x i32> %43, <4 x i32> %46, i32 1, i1 false, i1 false, i1 false, i1 false, i1 true) %53 = bitcast float %35 to i32 %54 = bitcast float %41 to i32 %55 = insertelement <4 x i32> , i32 %53, i32 0 %56 = insertelement <4 x i32> %55, i32 %54, i32 1 %57 = bitcast <4 x i32> %56 to <4 x float> %58 = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %57, <8 x i32> %43, <4 x i32> %46, i32 1, i1 false, i1 false, i1 false, i1 false, i1 true) %59 = call nsz float @llvm.rint.f32(float %27) #3 %60 = fsub nsz float %59, %27 %61 = call nsz float @llvm.fabs.f32(float %60) #4 %62 = fmul nsz float %61, 2.000000e+00 %63 = fsub nsz float 1.000000e+00, %62 %64 = fmul nsz float %52, %62 %65 = fmul nsz float %58, %63 %66 = fadd nsz float %64, %65 %67 = bitcast float %3 to i32 %68 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %67, 6 %69 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %68, float %66, 7 %70 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %69, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %70 } ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.rint.f32(float) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readonly declare float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind readonly } attributes #3 = { nounwind readnone } attributes #4 = { nounwind } !0 = !{} FRAG DCL IN[0], GENERIC[1], LINEAR DCL IN[1], GENERIC[2], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D_ARRAY, FLOAT DCL SVIEW[1], 2D_ARRAY, FLOAT DCL SVIEW[2], 2D_ARRAY, FLOAT DCL TEMP[0..4] IMM[0] FLT32 { -0.5000, 0.0000, 0.5000, 1.0000} IMM[1] FLT32 { 2.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[1].x, IN[0] 1: ADD TEMP[1].yz, IN[0], IMM[0].xxxx 2: ROUND TEMP[1].yz, TEMP[1] 3: MOV TEMP[1].w, IMM[0].yyyy 4: ADD TEMP[1].yz, TEMP[1], IMM[0].zzzz 5: MUL TEMP[1].y, TEMP[1], IN[0].wwww 6: MUL TEMP[1].z, TEMP[1], IN[1].wwww 7: MOV TEMP[3].x, IN[1] 8: ADD TEMP[3].yz, IN[1], IMM[0].xxxx 9: ROUND TEMP[3].yz, TEMP[3] 10: MOV TEMP[3].w, IMM[0].wwww 11: ADD TEMP[3].yz, TEMP[3], IMM[0].zzzz 12: MUL TEMP[3].y, TEMP[3], IN[0].wwww 13: MUL TEMP[3].z, TEMP[3], IN[1].wwww 14: TEX TEMP[2].x, TEMP[1].xyww, SAMP[0], 2D_ARRAY 15: TEX TEMP[2].y, TEMP[1].xzww, SAMP[1], 2D_ARRAY 16: TEX TEMP[2].z, TEMP[1].xzww, SAMP[2], 2D_ARRAY 17: TEX TEMP[4].x, TEMP[3].xyww, SAMP[0], 2D_ARRAY 18: TEX TEMP[4].y, TEMP[3].xzww, SAMP[1], 2D_ARRAY 19: TEX TEMP[4].z, TEMP[3].xzww, SAMP[2], 2D_ARRAY 20: ROUND TEMP[1].yz, IN[0] 21: ADD TEMP[1].yz, TEMP[1], -IN[0] 22: MUL TEMP[1].yz, |TEMP[1]|, IMM[1].xxxx 23: LRP TEMP[0], TEMP[1].yzzz, TEMP[2], TEMP[4] 24: MOV OUT[0].xy, TEMP[0].yzww 25: END radeonsi: Compiling shader 7 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %21 = bitcast <2 x i32> %10 to <2 x float> %22 = extractelement <2 x float> %21, i32 0 %23 = extractelement <2 x float> %21, i32 1 %24 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 0, i32 0, i32 %4) #3 %25 = call nsz float @llvm.amdgcn.interp.p2(float %24, float %23, i32 0, i32 0, i32 %4) #3 %26 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 2, i32 0, i32 %4) #3 %27 = call nsz float @llvm.amdgcn.interp.p2(float %26, float %23, i32 2, i32 0, i32 %4) #3 %28 = fadd nsz float %27, -5.000000e-01 %29 = call nsz float @llvm.rint.f32(float %28) #3 %30 = fadd nsz float %29, 5.000000e-01 %31 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 3, i32 1, i32 %4) #3 %32 = call nsz float @llvm.amdgcn.interp.p2(float %31, float %23, i32 3, i32 1, i32 %4) #3 %33 = fmul nsz float %30, %32 %34 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 0, i32 1, i32 %4) #3 %35 = call nsz float @llvm.amdgcn.interp.p2(float %34, float %23, i32 0, i32 1, i32 %4) #3 %36 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 2, i32 1, i32 %4) #3 %37 = call nsz float @llvm.amdgcn.interp.p2(float %36, float %23, i32 2, i32 1, i32 %4) #3 %38 = fadd nsz float %37, -5.000000e-01 %39 = call nsz float @llvm.rint.f32(float %38) #3 %40 = fadd nsz float %39, 5.000000e-01 %41 = fmul nsz float %40, %32 %42 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %43 = load <8 x i32>, <8 x i32> addrspace(2)* %42, align 32, !invariant.load !0 %44 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %45 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %44, i64 0, i64 39, !amdgpu.uniform !0 %46 = load <4 x i32>, <4 x i32> addrspace(2)* %45, align 16, !invariant.load !0 %47 = bitcast float %25 to i32 %48 = bitcast float %33 to i32 %49 = insertelement <4 x i32> , i32 %47, i32 0 %50 = insertelement <4 x i32> %49, i32 %48, i32 1 %51 = bitcast <4 x i32> %50 to <4 x float> %52 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %51, <8 x i32> %43, <4 x i32> %46, i32 15, i1 false, i1 false, i1 false, i1 false, i1 true) #3 %53 = extractelement <4 x float> %52, i32 1 %54 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 20, !amdgpu.uniform !0 %55 = load <8 x i32>, <8 x i32> addrspace(2)* %54, align 32, !invariant.load !0 %56 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %44, i64 0, i64 43, !amdgpu.uniform !0 %57 = load <4 x i32>, <4 x i32> addrspace(2)* %56, align 16, !invariant.load !0 %58 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %51, <8 x i32> %55, <4 x i32> %57, i32 15, i1 false, i1 false, i1 false, i1 false, i1 true) #3 %59 = extractelement <4 x float> %58, i32 2 %60 = bitcast float %35 to i32 %61 = bitcast float %41 to i32 %62 = insertelement <4 x i32> , i32 %60, i32 0 %63 = insertelement <4 x i32> %62, i32 %61, i32 1 %64 = bitcast <4 x i32> %63 to <4 x float> %65 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %64, <8 x i32> %43, <4 x i32> %46, i32 15, i1 false, i1 false, i1 false, i1 false, i1 true) #3 %66 = extractelement <4 x float> %65, i32 1 %67 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %64, <8 x i32> %55, <4 x i32> %57, i32 15, i1 false, i1 false, i1 false, i1 false, i1 true) #3 %68 = extractelement <4 x float> %67, i32 2 %69 = call nsz float @llvm.rint.f32(float %27) #3 %70 = fsub nsz float %69, %27 %71 = call nsz float @llvm.fabs.f32(float %70) #4 %72 = fmul nsz float %71, 2.000000e+00 %73 = fsub nsz float 1.000000e+00, %72 %74 = fmul nsz float %53, %72 %75 = fmul nsz float %66, %73 %76 = fadd nsz float %74, %75 %77 = fmul nsz float %59, %72 %78 = fmul nsz float %68, %73 %79 = fadd nsz float %77, %78 %80 = bitcast float %3 to i32 %81 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %80, 6 %82 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %81, float %76, 7 %83 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %82, float %79, 8 %84 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %83, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %84 } ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.rint.f32(float) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.fabs.f32(float) #1 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind readonly } attributes #3 = { nounwind readnone } attributes #4 = { nounwind } !0 = !{} FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 1D, FLOAT DCL CONST[0..2] DCL TEMP[0] 0: TEX TEMP[0], IN[0], SAMP[0], 2D 1: MOV OUT[0].w, TEMP[0] 2: TEX TEMP[0], TEMP[0], SAMP[1], 1D 3: DP4 OUT[0].x, CONST[0], TEMP[0] 4: DP4 OUT[0].y, CONST[1], TEMP[0] 5: DP4 OUT[0].z, CONST[2], TEMP[0] 6: END radeonsi: Compiling shader 8 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %21 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %22 = load <8 x i32>, <8 x i32> addrspace(2)* %21, align 32, !invariant.load !0 %23 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %24 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %23, i64 0, i64 35, !amdgpu.uniform !0 %25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !invariant.load !0 %26 = bitcast <2 x i32> %10 to <2 x float> %27 = extractelement <2 x float> %26, i32 0 %28 = extractelement <2 x float> %26, i32 1 %29 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 0, i32 0, i32 %4) #3 %30 = call nsz float @llvm.amdgcn.interp.p2(float %29, float %28, i32 0, i32 0, i32 %4) #3 %31 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 1, i32 0, i32 %4) #3 %32 = call nsz float @llvm.amdgcn.interp.p2(float %31, float %28, i32 1, i32 0, i32 %4) #3 %33 = bitcast float %30 to i32 %34 = bitcast float %32 to i32 %35 = insertelement <2 x i32> undef, i32 %33, i32 0 %36 = insertelement <2 x i32> %35, i32 %34, i32 1 %37 = bitcast <2 x i32> %36 to <2 x float> %38 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %37, <8 x i32> %22, <4 x i32> %25, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #3 %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 3 %41 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %42 = load <8 x i32>, <8 x i32> addrspace(2)* %41, align 32, !invariant.load !0 %43 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %23, i64 0, i64 39, !amdgpu.uniform !0 %44 = load <4 x i32>, <4 x i32> addrspace(2)* %43, align 16, !invariant.load !0 %45 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %39, <8 x i32> %42, <4 x i32> %44, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #3 %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %1, i64 0, i64 16, !amdgpu.uniform !0 %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !invariant.load !0 %52 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %51, i32 0) %53 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %51, i32 4) %54 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %51, i32 8) %55 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %51, i32 12) %56 = fmul nsz float %52, %46 %57 = fmul nsz float %53, %47 %58 = fadd nsz float %56, %57 %59 = fmul nsz float %54, %48 %60 = fadd nsz float %58, %59 %61 = fmul nsz float %55, %49 %62 = fadd nsz float %60, %61 %63 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %51, i32 16) %64 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %51, i32 20) %65 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %51, i32 24) %66 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %51, i32 28) %67 = fmul nsz float %63, %46 %68 = fmul nsz float %64, %47 %69 = fadd nsz float %67, %68 %70 = fmul nsz float %65, %48 %71 = fadd nsz float %69, %70 %72 = fmul nsz float %66, %49 %73 = fadd nsz float %71, %72 %74 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %51, i32 32) %75 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %51, i32 36) %76 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %51, i32 40) %77 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %51, i32 44) %78 = fmul nsz float %74, %46 %79 = fmul nsz float %75, %47 %80 = fadd nsz float %78, %79 %81 = fmul nsz float %76, %48 %82 = fadd nsz float %80, %81 %83 = fmul nsz float %77, %49 %84 = fadd nsz float %82, %83 %85 = bitcast float %3 to i32 %86 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %85, 6 %87 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %86, float %62, 7 %88 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %87, float %73, 8 %89 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %88, float %84, 9 %90 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %89, float %40, 10 %91 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %90, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %91 } ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #3 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind readonly } attributes #3 = { nounwind readnone } !0 = !{} FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 1D, FLOAT DCL TEMP[0] 0: TEX TEMP[0], IN[0], SAMP[0], 2D 1: MOV OUT[0].w, TEMP[0] 2: TEX OUT[0].xyz, TEMP[0], SAMP[1], 1D 3: END radeonsi: Compiling shader 9 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %21 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %22 = load <8 x i32>, <8 x i32> addrspace(2)* %21, align 32, !invariant.load !0 %23 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %24 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %23, i64 0, i64 35, !amdgpu.uniform !0 %25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !invariant.load !0 %26 = bitcast <2 x i32> %10 to <2 x float> %27 = extractelement <2 x float> %26, i32 0 %28 = extractelement <2 x float> %26, i32 1 %29 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 0, i32 0, i32 %4) #3 %30 = call nsz float @llvm.amdgcn.interp.p2(float %29, float %28, i32 0, i32 0, i32 %4) #3 %31 = call nsz float @llvm.amdgcn.interp.p1(float %27, i32 1, i32 0, i32 %4) #3 %32 = call nsz float @llvm.amdgcn.interp.p2(float %31, float %28, i32 1, i32 0, i32 %4) #3 %33 = bitcast float %30 to i32 %34 = bitcast float %32 to i32 %35 = insertelement <2 x i32> undef, i32 %33, i32 0 %36 = insertelement <2 x i32> %35, i32 %34, i32 1 %37 = bitcast <2 x i32> %36 to <2 x float> %38 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %37, <8 x i32> %22, <4 x i32> %25, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #3 %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 3 %41 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %42 = load <8 x i32>, <8 x i32> addrspace(2)* %41, align 32, !invariant.load !0 %43 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %23, i64 0, i64 39, !amdgpu.uniform !0 %44 = load <4 x i32>, <4 x i32> addrspace(2)* %43, align 16, !invariant.load !0 %45 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %39, <8 x i32> %42, <4 x i32> %44, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #3 %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = bitcast float %3 to i32 %50 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %49, 6 %51 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %50, float %46, 7 %52 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %51, float %47, 8 %53 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %52, float %48, 9 %54 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %53, float %40, 10 %55 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %54, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %55 } ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind readonly } attributes #3 = { nounwind readnone } !0 = !{} FRAG DCL IN[0], GENERIC[0], LINEAR DCL IN[1], COLOR, LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0] 0: TEX TEMP[0], IN[0], SAMP[0], 2D 1: MUL OUT[0], TEMP[0], IN[1] 2: END radeonsi: Compiling shader 10 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32, float, float, float, float) #0 { main_body: %25 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !invariant.load !0 %27 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %28 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %27, i64 0, i64 35, !amdgpu.uniform !0 %29 = load <4 x i32>, <4 x i32> addrspace(2)* %28, align 16, !invariant.load !0 %30 = bitcast <2 x i32> %10 to <2 x float> %31 = extractelement <2 x float> %30, i32 0 %32 = extractelement <2 x float> %30, i32 1 %33 = call nsz float @llvm.amdgcn.interp.p1(float %31, i32 0, i32 0, i32 %4) #3 %34 = call nsz float @llvm.amdgcn.interp.p2(float %33, float %32, i32 0, i32 0, i32 %4) #3 %35 = call nsz float @llvm.amdgcn.interp.p1(float %31, i32 1, i32 0, i32 %4) #3 %36 = call nsz float @llvm.amdgcn.interp.p2(float %35, float %32, i32 1, i32 0, i32 %4) #3 %37 = bitcast float %34 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = bitcast <2 x i32> %40 to <2 x float> %42 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %41, <8 x i32> %26, <4 x i32> %29, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #3 %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = fmul nsz float %43, %21 %48 = fmul nsz float %44, %22 %49 = fmul nsz float %45, %23 %50 = fmul nsz float %46, %24 %51 = bitcast float %3 to i32 %52 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %51, 6 %53 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %52, float %47, 7 %54 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %53, float %48, 8 %55 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %54, float %49, 9 %56 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %55, float %50, 10 %57 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %56, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %57 } ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind readonly } attributes #3 = { nounwind readnone } !0 = !{} FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END radeonsi: Compiling shader 11 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %21 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %4) #2 %22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 1, i32 0, i32 %4) #2 %23 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 2, i32 0, i32 %4) #2 %24 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 3, i32 0, i32 %4) #2 %25 = bitcast float %3 to i32 %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %25, 6 %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %26, float %21, 7 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %27, float %22, 8 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 9 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 10 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31 } ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind readnone } VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END radeonsi: Compiling shader 12 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs void @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) #0 { main_body: %14 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 0, !amdgpu.uniform !0 %15 = load <4 x i32>, <4 x i32> addrspace(2)* %14, align 16, !invariant.load !0 %16 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %15, i32 %12, i32 0, i1 false, i1 false) #3 %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 1, !amdgpu.uniform !0 %22 = load <4 x i32>, <4 x i32> addrspace(2)* %21, align 16, !invariant.load !0 %23 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %22, i32 %13, i32 0, i1 false, i1 false) #3 %24 = extractelement <4 x float> %23, i32 0 %25 = extractelement <4 x float> %23, i32 1 %26 = extractelement <4 x float> %23, i32 2 %27 = extractelement <4 x float> %23, i32 3 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %24, float %25, float %26, float %27, i1 false, i1 false) #2 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %17, float %18, float %19, float %20, i1 true, i1 false) #2 ret void } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #2 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind } attributes #3 = { nounwind readnone } !0 = !{} radeonsi: Compiling shader 13 Vertex Shader Prolog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> @vs_prolog(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %16 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> undef, i32 %0, 0 %17 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %16, i32 %1, 1 %18 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %17, i32 %2, 2 %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %18, i32 %3, 3 %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %19, i32 %4, 4 %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %20, i32 %5, 5 %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %21, i32 %6, 6 %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %22, i32 %7, 7 %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %23, i32 %8, 8 %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %24, i32 %9, 9 %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %25, i32 %10, 10 %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %26, i32 %11, 11 %28 = bitcast i32 %12 to float %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %27, float %28, 12 %30 = bitcast i32 %13 to float %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %29, float %30, 13 %32 = bitcast i32 %14 to float %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %31, float %32, 14 %34 = bitcast i32 %15 to float %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %33, float %34, 15 %36 = add i32 %12, %8 %37 = bitcast i32 %36 to float %38 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %35, float %37, 16 %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %38, float %37, 17 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %39 } attributes #0 = { "no-signed-zeros-fp-math"="true" } SHADER KEY part.vs.prolog.instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} mono.vs.fix_fetch = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 mono.u.vs_export_prim_id = 0 opt.kill_outputs[0] = 0x0 opt.kill_outputs[1] = 0x0 opt.clip_disable = 0 Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s8, v0 ; 32080008 v_mov_b32_e32 v5, v4 ; 7E0A0304 Shader main disassembly: s_load_dwordx4 s[0:3], s[6:7], 0x0 ; C00A0003 00000000 s_load_dwordx4 s[4:7], s[6:7], 0x10 ; C00A0103 00000010 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v5, s[4:7], 0 idxen ; E00C2000 80010005 buffer_load_format_xyzw v[4:7], v4, s[0:3], 0 idxen ; E00C2000 80000404 s_waitcnt vmcnt(1) ; BF8C0F71 exp param0 v0, v1, v2, v3 ; C400020F 03020100 s_waitcnt vmcnt(0) ; BF8C0F70 exp pos0 v4, v5, v6, v7 done ; C40008CF 07060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 72 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** radeonsi: Compiling shader 14 Fragment Shader Epilog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps void @ps_epilog(i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 { main_body: %18 = call nsz <2 x half> @llvm.amdgcn.cvt.pkrtz(float %4, float %5) #3 %19 = call nsz <2 x half> @llvm.amdgcn.cvt.pkrtz(float %6, float %7) #3 %20 = bitcast <2 x half> %18 to <2 x i16> %21 = bitcast <2 x half> %19 to <2 x i16> call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 15, <2 x i16> %20, <2 x i16> %21, i1 true, i1 true) #2 ret void } ; Function Attrs: nounwind readnone speculatable declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1 ; Function Attrs: nounwind declare void @llvm.amdgcn.exp.compr.v2i16(i32, i32, <2 x i16>, <2 x i16>, i1, i1) #2 attributes #0 = { "InitialPSInputAddr"="16777215" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind } attributes #3 = { nounwind readnone } SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x4 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b32 m0, s7 ; BEFC0007 v_interp_mov_f32 v0, p0, attr0.x ; D4020002 v_interp_mov_f32 v1, p0, attr0.y ; D4060102 v_interp_mov_f32 v2, p0, attr0.z ; D40A0202 v_interp_mov_f32 v3, p0, attr0.w ; D40E0302 Shader epilog disassembly: v_cvt_pkrtz_f16_f32 v0, v0, v1 ; D2960000 00020300 v_cvt_pkrtz_f16_f32 v1, v2, v3 ; D2960001 00020702 exp mrt0 v0, v0, v1, v1 done compr vm ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0020 *** SHADER STATS *** SGPRS: 10 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 48 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1] 0: F2I TEMP[1], IN[0] 1: TXF_LZ TEMP[0], TEMP[1], SAMP[0], 2D 2: MOV OUT[0], TEMP[0] 3: END radeonsi: Compiling shader 15 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %21 = bitcast <2 x i32> %10 to <2 x float> %22 = extractelement <2 x float> %21, i32 0 %23 = extractelement <2 x float> %21, i32 1 %24 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 0, i32 0, i32 %4) #3 %25 = call nsz float @llvm.amdgcn.interp.p2(float %24, float %23, i32 0, i32 0, i32 %4) #3 %26 = fptosi float %25 to i32 %27 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 1, i32 0, i32 %4) #3 %28 = call nsz float @llvm.amdgcn.interp.p2(float %27, float %23, i32 1, i32 0, i32 %4) #3 %29 = fptosi float %28 to i32 %30 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %31 = load <8 x i32>, <8 x i32> addrspace(2)* %30, align 32, !invariant.load !0 %32 = insertelement <2 x i32> undef, i32 %26, i32 0 %33 = insertelement <2 x i32> %32, i32 %29, i32 1 %34 = call nsz <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %33, <8 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false) #3 %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = bitcast float %3 to i32 %40 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %39, 6 %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %40, float %35, 7 %42 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %41, float %36, 8 %43 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %42, float %37, 9 %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %43, float %38, 10 %45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45 } ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind readonly } attributes #3 = { nounwind readnone } !0 = !{} SHADER KEY part.vs.prolog.instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} mono.vs.fix_fetch = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 mono.u.vs_export_prim_id = 0 opt.kill_outputs[0] = 0x0 opt.kill_outputs[1] = 0x0 opt.clip_disable = 0 Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s8, v0 ; 32080008 v_mov_b32_e32 v5, v4 ; 7E0A0304 Shader main disassembly: s_load_dwordx4 s[0:3], s[6:7], 0x0 ; C00A0003 00000000 s_load_dwordx4 s[4:7], s[6:7], 0x10 ; C00A0103 00000010 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v5, s[4:7], 0 idxen ; E00C2000 80010005 buffer_load_format_xyzw v[4:7], v4, s[0:3], 0 idxen ; E00C2000 80000404 s_waitcnt vmcnt(1) ; BF8C0F71 exp param0 v0, v1, v2, v3 ; C400020F 03020100 s_waitcnt vmcnt(0) ; BF8C0F70 exp pos0 v4, v5, v6, v7 done ; C40008CF 07060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 72 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x4 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b32 m0, s7 ; BEFC0007 v_interp_p1_f32 v0, v8, attr0.x ; D4000008 v_interp_p1_f32 v1, v8, attr0.y ; D4040108 v_interp_p2_f32 v0, v9, attr0.x ; D4010009 v_interp_p2_f32 v1, v9, attr0.y ; D4050109 s_load_dwordx8 s[8:15], s[4:5], 0x200 ; C00E0202 00000200 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 s_waitcnt lgkmcnt(0) ; BF8C007F image_load v[0:3], v[0:1], s[8:15] dmask:0xf unorm ; F0001F00 00020000 s_waitcnt vmcnt(0) ; BF8C0F70 Shader epilog disassembly: v_cvt_pkrtz_f16_f32 v0, v0, v1 ; D2960000 00020300 v_cvt_pkrtz_f16_f32 v1, v2, v3 ; D2960001 00020702 exp mrt0 v0, v0, v1, v1 done compr vm ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0020 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1].xy, GENERIC[0] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MOV TEMP[0].xy, IN[0].xyxx 2: MOV OUT[0], TEMP[0] 3: MOV OUT[1].xy, IN[1].xyxx 4: END radeonsi: Compiling shader 16 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs void @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) #0 { main_body: %14 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 0, !amdgpu.uniform !0 %15 = load <4 x i32>, <4 x i32> addrspace(2)* %14, align 16, !invariant.load !0 %16 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %15, i32 %12, i32 0, i1 false, i1 false) #3 %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 1, !amdgpu.uniform !0 %20 = load <4 x i32>, <4 x i32> addrspace(2)* %19, align 16, !invariant.load !0 %21 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %20, i32 %13, i32 0, i1 false, i1 false) #3 %22 = extractelement <4 x float> %21, i32 0 %23 = extractelement <4 x float> %21, i32 1 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %22, float %23, float undef, float undef, i1 false, i1 false) #2 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %17, float %18, float 1.000000e+00, float 1.000000e+00, i1 true, i1 false) #2 ret void } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #2 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind } attributes #3 = { nounwind readnone } !0 = !{} FRAG DCL IN[0].xy, GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..1] DCL TEMP[0..87], LOCAL IMM[0] FLT32 { 1.0000, 34.0000, 0.0035, 289.0000} IMM[1] FLT32 { 0.0244, 6.2832, 16.0000, 0.2500} IMM[2] FLT32 { 0.0039, 0.0059, -0.5000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyxx 1: MOV TEMP[0].z, CONST[0].xxxx 2: ADD TEMP[1].xyz, TEMP[0].xyzz, IMM[0].xxxx 3: MAD TEMP[2].x, IMM[0].yyyy, TEMP[1].xxxx, IMM[0].xxxx 4: MUL TEMP[3].x, TEMP[2].xxxx, TEMP[1].xxxx 5: MUL TEMP[4].x, TEMP[3].xxxx, IMM[0].zzzz 6: FLR TEMP[5].x, TEMP[4].xxxx 7: MUL TEMP[6].x, TEMP[5].xxxx, IMM[0].wwww 8: ADD TEMP[7].x, TEMP[3].xxxx, -TEMP[6].xxxx 9: ADD TEMP[8].x, TEMP[7].xxxx, TEMP[1].yyyy 10: MAD TEMP[9].x, IMM[0].yyyy, TEMP[8].xxxx, IMM[0].xxxx 11: MUL TEMP[10].x, TEMP[9].xxxx, TEMP[8].xxxx 12: MUL TEMP[11].x, TEMP[10].xxxx, IMM[0].zzzz 13: FLR TEMP[12].x, TEMP[11].xxxx 14: MUL TEMP[13].x, TEMP[12].xxxx, IMM[0].wwww 15: ADD TEMP[14].x, TEMP[10].xxxx, -TEMP[13].xxxx 16: ADD TEMP[15].x, TEMP[14].xxxx, TEMP[1].zzzz 17: MAD TEMP[16].x, IMM[0].yyyy, TEMP[15].xxxx, IMM[0].xxxx 18: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[15].xxxx 19: MUL TEMP[18].x, TEMP[17].xxxx, IMM[0].zzzz 20: FLR TEMP[19].x, TEMP[18].xxxx 21: MUL TEMP[20].x, TEMP[19].xxxx, IMM[0].wwww 22: ADD TEMP[21].x, TEMP[17].xxxx, -TEMP[20].xxxx 23: MOV TEMP[22].xy, IN[0].xyyy 24: TEX TEMP[23], TEMP[22], SAMP[0], 2D 25: MAD TEMP[24].x, IMM[0].yyyy, TEMP[21].xxxx, IMM[0].xxxx 26: MUL TEMP[25].x, TEMP[24].xxxx, TEMP[21].xxxx 27: MUL TEMP[26].x, TEMP[25].xxxx, IMM[0].zzzz 28: FLR TEMP[27].x, TEMP[26].xxxx 29: MUL TEMP[28].x, TEMP[27].xxxx, IMM[0].wwww 30: ADD TEMP[29].x, TEMP[25].xxxx, -TEMP[28].xxxx 31: MUL TEMP[30].x, TEMP[29].xxxx, IMM[1].xxxx 32: FRC TEMP[31].x, TEMP[30].xxxx 33: MUL TEMP[32].x, TEMP[31].xxxx, IMM[1].yyyy 34: MAD TEMP[33].x, IMM[0].yyyy, TEMP[29].xxxx, IMM[0].xxxx 35: MUL TEMP[34].x, TEMP[33].xxxx, TEMP[29].xxxx 36: MUL TEMP[35].x, TEMP[34].xxxx, IMM[0].zzzz 37: FLR TEMP[36].x, TEMP[35].xxxx 38: MUL TEMP[37].x, TEMP[36].xxxx, IMM[0].wwww 39: ADD TEMP[38].x, TEMP[34].xxxx, -TEMP[37].xxxx 40: COS TEMP[39].x, TEMP[32].xxxx 41: SIN TEMP[40].x, TEMP[32].xxxx 42: MOV TEMP[39].y, TEMP[40].xxxx 43: MUL TEMP[41].x, TEMP[21].xxxx, IMM[1].xxxx 44: FRC TEMP[42].x, TEMP[41].xxxx 45: MUL TEMP[43].x, TEMP[42].xxxx, IMM[1].zzzz 46: MUL TEMP[44].xy, TEMP[43].xxxx, TEMP[39].xyyy 47: MOV TEMP[45].x, -TEMP[44].yyyy 48: MOV TEMP[45].y, TEMP[44].xxxx 49: MOV TEMP[46].x, -TEMP[44].xxxx 50: MOV TEMP[46].y, -TEMP[44].yyyy 51: MOV TEMP[47].x, TEMP[44].yyyy 52: MOV TEMP[47].y, -TEMP[44].xxxx 53: MAD TEMP[48].xy, CONST[1].xyyy, TEMP[44].xyyy, IN[0].xyyy 54: MOV TEMP[49].xy, TEMP[48].xyyy 55: TEX TEMP[50], TEMP[49], SAMP[0], 2D 56: MAD TEMP[51].xy, CONST[1].xyyy, TEMP[45].xyyy, IN[0].xyyy 57: MOV TEMP[52].xy, TEMP[51].xyyy 58: TEX TEMP[53], TEMP[52], SAMP[0], 2D 59: ADD TEMP[54], TEMP[50], TEMP[53] 60: MAD TEMP[55].xy, CONST[1].xyyy, TEMP[46].xyyy, IN[0].xyyy 61: MOV TEMP[56].xy, TEMP[55].xyyy 62: TEX TEMP[57], TEMP[56], SAMP[0], 2D 63: ADD TEMP[58], TEMP[54], TEMP[57] 64: MAD TEMP[59].xy, CONST[1].xyyy, TEMP[47].xyyy, IN[0].xyyy 65: MOV TEMP[60].xy, TEMP[59].xyyy 66: TEX TEMP[61], TEMP[60], SAMP[0], 2D 67: ADD TEMP[62], TEMP[58], TEMP[61] 68: MUL TEMP[63], TEMP[62], IMM[1].wwww 69: ADD TEMP[64], TEMP[23], -TEMP[63] 70: MOV TEMP[65], |TEMP[64]| 71: FSLT TEMP[66], IMM[2].xxxx, TEMP[65] 72: UCMP TEMP[67].xyz, TEMP[66], TEMP[23], TEMP[63] 73: MUL TEMP[68].x, TEMP[38].xxxx, IMM[1].xxxx 74: FRC TEMP[69].x, TEMP[68].xxxx 75: MAD TEMP[70].x, IMM[0].yyyy, TEMP[38].xxxx, IMM[0].xxxx 76: MUL TEMP[71].x, TEMP[70].xxxx, TEMP[38].xxxx 77: MUL TEMP[72].x, TEMP[71].xxxx, IMM[0].zzzz 78: FLR TEMP[73].x, TEMP[72].xxxx 79: MUL TEMP[74].x, TEMP[73].xxxx, IMM[0].wwww 80: ADD TEMP[75].x, TEMP[71].xxxx, -TEMP[74].xxxx 81: MUL TEMP[76].x, TEMP[75].xxxx, IMM[1].xxxx 82: FRC TEMP[77].x, TEMP[76].xxxx 83: MOV TEMP[69].y, TEMP[77].xxxx 84: MAD TEMP[78].x, IMM[0].yyyy, TEMP[75].xxxx, IMM[0].xxxx 85: MUL TEMP[79].x, TEMP[78].xxxx, TEMP[75].xxxx 86: MUL TEMP[80].x, TEMP[79].xxxx, IMM[0].zzzz 87: FLR TEMP[81].x, TEMP[80].xxxx 88: MUL TEMP[82].x, TEMP[81].xxxx, IMM[0].wwww 89: ADD TEMP[83].x, TEMP[79].xxxx, -TEMP[82].xxxx 90: MUL TEMP[84].x, TEMP[83].xxxx, IMM[1].xxxx 91: FRC TEMP[85].x, TEMP[84].xxxx 92: MOV TEMP[69].z, TEMP[85].xxxx 93: ADD TEMP[86].xyz, TEMP[69].xyzz, IMM[2].zzzz 94: MAD TEMP[87].x, IMM[2].yyyy, TEMP[86].xyzz, TEMP[67].xyzz 95: MOV TEMP[67].x, TEMP[87].xxxx 96: MOV TEMP[67].y, IMM[2].wwww 97: MOV TEMP[67].z, IMM[2].wwww 98: MOV TEMP[67].w, IMM[0].xxxx 99: MOV OUT[0], TEMP[67] 100: END radeonsi: Compiling shader 17 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %21 = bitcast <2 x i32> %6 to <2 x float> %22 = extractelement <2 x float> %21, i32 0 %23 = extractelement <2 x float> %21, i32 1 %24 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 0, i32 0, i32 %4) #2 %25 = call nsz float @llvm.amdgcn.interp.p2(float %24, float %23, i32 0, i32 0, i32 %4) #2 %26 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 1, i32 0, i32 %4) #2 %27 = call nsz float @llvm.amdgcn.interp.p2(float %26, float %23, i32 1, i32 0, i32 %4) #2 %28 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %1, i64 0, i64 16, !amdgpu.uniform !0 %29 = load <4 x i32>, <4 x i32> addrspace(2)* %28, align 16, !invariant.load !0 %30 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 0) %31 = fadd nsz float %25, 1.000000e+00 %32 = fadd nsz float %27, 1.000000e+00 %33 = fadd nsz float %30, 1.000000e+00 %34 = fmul nsz float %31, 3.400000e+01 %35 = fadd nsz float %34, 1.000000e+00 %36 = fmul nsz float %35, %31 %37 = fmul nsz float %36, 0x3F6C5894E0000000 %38 = call nsz float @llvm.floor.f32(float %37) #2 %39 = fmul nsz float %38, 2.890000e+02 %40 = fsub nsz float %36, %39 %41 = fadd nsz float %40, %32 %42 = fmul nsz float %41, 3.400000e+01 %43 = fadd nsz float %42, 1.000000e+00 %44 = fmul nsz float %43, %41 %45 = fmul nsz float %44, 0x3F6C5894E0000000 %46 = call nsz float @llvm.floor.f32(float %45) #2 %47 = fmul nsz float %46, 2.890000e+02 %48 = fsub nsz float %44, %47 %49 = fadd nsz float %48, %33 %50 = fmul nsz float %49, 3.400000e+01 %51 = fadd nsz float %50, 1.000000e+00 %52 = fmul nsz float %51, %49 %53 = fmul nsz float %52, 0x3F6C5894E0000000 %54 = call nsz float @llvm.floor.f32(float %53) #2 %55 = fmul nsz float %54, 2.890000e+02 %56 = fsub nsz float %52, %55 %57 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %58 = load <8 x i32>, <8 x i32> addrspace(2)* %57, align 32, !invariant.load !0 %59 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %60 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %59, i64 0, i64 35, !amdgpu.uniform !0 %61 = load <4 x i32>, <4 x i32> addrspace(2)* %60, align 16, !invariant.load !0 %62 = bitcast float %25 to i32 %63 = bitcast float %27 to i32 %64 = insertelement <2 x i32> undef, i32 %62, i32 0 %65 = insertelement <2 x i32> %64, i32 %63, i32 1 %66 = bitcast <2 x i32> %65 to <2 x float> %67 = call float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float> %66, <8 x i32> %58, <4 x i32> %61, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %68 = fmul nsz float %56, 3.400000e+01 %69 = fadd nsz float %68, 1.000000e+00 %70 = fmul nsz float %69, %56 %71 = fmul nsz float %70, 0x3F6C5894E0000000 %72 = call nsz float @llvm.floor.f32(float %71) #2 %73 = fmul nsz float %72, 2.890000e+02 %74 = fsub nsz float %70, %73 %75 = fmul nsz float %74, 0x3F98F9C180000000 %76 = call nsz float @llvm.floor.f32(float %75) #2 %77 = fsub nsz float %75, %76 %78 = fmul nsz float %77, 0x401921FB60000000 %79 = fmul nsz float %74, 3.400000e+01 %80 = fadd nsz float %79, 1.000000e+00 %81 = fmul nsz float %80, %74 %82 = fmul nsz float %81, 0x3F6C5894E0000000 %83 = call nsz float @llvm.floor.f32(float %82) #2 %84 = fmul nsz float %83, 2.890000e+02 %85 = fsub nsz float %81, %84 %86 = call nsz float @llvm.cos.f32(float %78) #2 %87 = call nsz float @llvm.sin.f32(float %78) #2 %88 = fmul nsz float %56, 0x3F98F9C180000000 %89 = call nsz float @llvm.floor.f32(float %88) #2 %90 = fsub nsz float %88, %89 %91 = fmul nsz float %90, 1.600000e+01 %92 = fmul nsz float %91, %86 %93 = fmul nsz float %91, %87 %94 = fsub nsz float -0.000000e+00, %93 %95 = fsub nsz float -0.000000e+00, %92 %96 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 16) %97 = fmul nsz float %96, %92 %98 = fadd nsz float %97, %25 %99 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 20) %100 = fmul nsz float %99, %93 %101 = fadd nsz float %100, %27 %102 = bitcast float %98 to i32 %103 = bitcast float %101 to i32 %104 = insertelement <2 x i32> undef, i32 %102, i32 0 %105 = insertelement <2 x i32> %104, i32 %103, i32 1 %106 = bitcast <2 x i32> %105 to <2 x float> %107 = call float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float> %106, <8 x i32> %58, <4 x i32> %61, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %108 = fmul nsz float %96, %94 %109 = fadd nsz float %108, %25 %110 = fmul nsz float %99, %92 %111 = fadd nsz float %110, %27 %112 = bitcast float %109 to i32 %113 = bitcast float %111 to i32 %114 = insertelement <2 x i32> undef, i32 %112, i32 0 %115 = insertelement <2 x i32> %114, i32 %113, i32 1 %116 = bitcast <2 x i32> %115 to <2 x float> %117 = call float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float> %116, <8 x i32> %58, <4 x i32> %61, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %118 = fadd nsz float %107, %117 %119 = fmul nsz float %96, %95 %120 = fadd nsz float %119, %25 %121 = fmul nsz float %99, %94 %122 = fadd nsz float %121, %27 %123 = bitcast float %120 to i32 %124 = bitcast float %122 to i32 %125 = insertelement <2 x i32> undef, i32 %123, i32 0 %126 = insertelement <2 x i32> %125, i32 %124, i32 1 %127 = bitcast <2 x i32> %126 to <2 x float> %128 = call float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float> %127, <8 x i32> %58, <4 x i32> %61, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %129 = fadd nsz float %118, %128 %130 = fmul nsz float %96, %93 %131 = fadd nsz float %130, %25 %132 = fmul nsz float %99, %95 %133 = fadd nsz float %132, %27 %134 = bitcast float %131 to i32 %135 = bitcast float %133 to i32 %136 = insertelement <2 x i32> undef, i32 %134, i32 0 %137 = insertelement <2 x i32> %136, i32 %135, i32 1 %138 = bitcast <2 x i32> %137 to <2 x float> %139 = call float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float> %138, <8 x i32> %58, <4 x i32> %61, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %140 = fadd nsz float %129, %139 %141 = fmul nsz float %140, 2.500000e-01 %142 = fsub nsz float %67, %141 %143 = call nsz float @llvm.fabs.f32(float %142) #4 %144 = fcmp nsz ogt float %143, 0x3F6FFF79C0000000 %145 = select i1 %144, float %67, float %141 %146 = fmul nsz float %85, 0x3F98F9C180000000 %147 = call nsz float @llvm.floor.f32(float %146) #2 %148 = fsub nsz float %146, %147 %149 = fadd nsz float %148, -5.000000e-01 %150 = fmul nsz float %149, 0x3F77FF9B60000000 %151 = fadd nsz float %150, %145 %152 = bitcast float %3 to i32 %153 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %152, 6 %154 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %153, float %151, 7 %155 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %154, float 0.000000e+00, 8 %156 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %155, float 0.000000e+00, 9 %157 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %156, float 1.000000e+00, 10 %158 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %157, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %158 } ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3 ; Function Attrs: nounwind readnone speculatable declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readonly declare float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind readnone } attributes #3 = { nounwind readonly } attributes #4 = { nounwind } !0 = !{} SHADER KEY part.vs.prolog.instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} mono.vs.fix_fetch = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 mono.u.vs_export_prim_id = 0 opt.kill_outputs[0] = 0x0 opt.kill_outputs[1] = 0x0 opt.clip_disable = 0 Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s8, v0 ; 32080008 v_mov_b32_e32 v5, v4 ; 7E0A0304 Shader main disassembly: s_load_dwordx4 s[0:3], s[6:7], 0x0 ; C00A0003 00000000 s_load_dwordx4 s[4:7], s[6:7], 0x10 ; C00A0103 00000010 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v5, s[4:7], 0 idxen ; E00C2000 80010005 s_waitcnt vmcnt(0) ; BF8C0F70 buffer_load_format_xyzw v[2:5], v4, s[0:3], 0 idxen ; E00C2000 80000204 exp param0 v0, v1, v0, v0 ; C400020F 00000100 s_waitcnt expcnt(0) ; BF8C0F0F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 s_waitcnt vmcnt(0) ; BF8C0F70 exp pos0 v2, v3, v0, v0 done ; C40008CF 00000302 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** radeonsi: Compiling shader 18 Fragment Shader Epilog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps void @ps_epilog(i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 { main_body: %18 = call nsz float @llvm.maxnum.f32(float %4, float 0.000000e+00) #3 %19 = call nsz float @llvm.minnum.f32(float %18, float 1.000000e+00) #3 %20 = fmul nsz float %19, 6.553500e+04 %21 = fadd nsz float %20, 5.000000e-01 %22 = fptoui float %21 to i32 %23 = call nsz float @llvm.maxnum.f32(float %5, float 0.000000e+00) #3 %24 = call nsz float @llvm.minnum.f32(float %23, float 1.000000e+00) #3 %25 = fmul nsz float %24, 6.553500e+04 %26 = fadd nsz float %25, 5.000000e-01 %27 = fptoui float %26 to i32 %28 = call nsz float @llvm.maxnum.f32(float %6, float 0.000000e+00) #3 %29 = call nsz float @llvm.minnum.f32(float %28, float 1.000000e+00) #3 %30 = fmul nsz float %29, 6.553500e+04 %31 = fadd nsz float %30, 5.000000e-01 %32 = fptoui float %31 to i32 %33 = call nsz float @llvm.maxnum.f32(float %7, float 0.000000e+00) #3 %34 = call nsz float @llvm.minnum.f32(float %33, float 1.000000e+00) #3 %35 = fmul nsz float %34, 6.553500e+04 %36 = fadd nsz float %35, 5.000000e-01 %37 = fptoui float %36 to i32 %38 = shl i32 %27, 16 %39 = or i32 %38, %22 %40 = shl i32 %37, 16 %41 = or i32 %40, %32 %42 = bitcast i32 %39 to <2 x i16> %43 = bitcast i32 %41 to <2 x i16> call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 15, <2 x i16> %42, <2 x i16> %43, i1 true, i1 true) #2 ret void } ; Function Attrs: nounwind readnone speculatable declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind declare void @llvm.amdgcn.exp.compr.v2i16(i32, i32, <2 x i16>, <2 x i16>, i1, i1) #2 attributes #0 = { "InitialPSInputAddr"="16777215" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind } attributes #3 = { nounwind readnone } SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x5 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b64 s[16:17], exec ; BE90017E s_wqm_b64 exec, exec ; BEFE077E s_mov_b32 m0, s7 ; BEFC0007 v_interp_p1_f32 v0, v2, attr0.x ; D4000002 v_interp_p2_f32 v0, v3, attr0.x ; D4010003 v_interp_p1_f32 v1, v2, attr0.y ; D4040102 v_add_f32_e32 v2, 1.0, v0 ; 020400F2 v_mov_b32_e32 v5, 0x42080000 ; 7E0A02FF 42080000 v_mad_f32 v6, v2, v5, 1.0 ; D1C10006 03CA0B02 s_load_dwordx4 s[0:3], s[2:3], 0x100 ; C00A0001 00000100 v_mul_f32_e32 v2, v2, v6 ; 0A040D02 v_mov_b32_e32 v6, 0x3b62c4a7 ; 7E0C02FF 3B62C4A7 v_mul_f32_e32 v7, v6, v2 ; 0A0E0506 v_interp_p2_f32 v1, v3, attr0.y ; D4050103 v_floor_f32_e32 v7, v7 ; 7E0E3F07 v_mov_b32_e32 v8, 0xc3908000 ; 7E1002FF C3908000 v_add_f32_e32 v3, 1.0, v1 ; 020602F2 v_mac_f32_e32 v2, v8, v7 ; 2C040F08 v_add_f32_e32 v2, v3, v2 ; 02040503 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s7, s[0:3], 0x0 ; C02201C0 00000000 v_mad_f32 v3, v2, v5, 1.0 ; D1C10003 03CA0B02 v_mul_f32_e32 v7, v2, v3 ; 0A0E0702 v_mul_f32_e32 v7, v6, v7 ; 0A0E0F06 v_floor_f32_e32 v7, v7 ; 7E0E3F07 v_mul_f32_e32 v7, v8, v7 ; 0A0E0F08 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e64 v4, s7, 1.0 ; D1010004 0001E407 v_mac_f32_e32 v7, v2, v3 ; 2C0E0702 v_add_f32_e32 v2, v4, v7 ; 02040F04 v_mad_f32 v3, v2, v5, 1.0 ; D1C10003 03CA0B02 v_mul_f32_e32 v4, v2, v3 ; 0A080702 v_mul_f32_e32 v4, v6, v4 ; 0A080906 v_floor_f32_e32 v4, v4 ; 7E083F04 v_mul_f32_e32 v4, v8, v4 ; 0A080908 v_mac_f32_e32 v4, v2, v3 ; 2C080702 v_mad_f32 v2, v4, v5, 1.0 ; D1C10002 03CA0B04 v_mul_f32_e32 v3, v4, v2 ; 0A060504 v_mul_f32_e32 v3, v6, v3 ; 0A060706 v_floor_f32_e32 v3, v3 ; 7E063F03 v_mul_f32_e32 v3, v8, v3 ; 0A060708 v_mac_f32_e32 v3, v4, v2 ; 2C060504 v_mov_b32_e32 v9, 0x3cc7ce0c ; 7E1202FF 3CC7CE0C v_mul_f32_e32 v2, v9, v3 ; 0A040709 v_floor_f32_e32 v2, v2 ; 7E043F02 v_mad_f32 v5, v3, v5, 1.0 ; D1C10005 03CA0B03 v_mad_f32 v2, v3, v9, -v2 ; D1C10002 840A1303 v_mul_f32_e32 v10, v3, v5 ; 0A140B03 v_mul_f32_e32 v2, 0x40c90fdb, v2 ; 0A0404FF 40C90FDB v_mul_f32_e32 v6, v6, v10 ; 0A0C1506 v_floor_f32_e32 v6, v6 ; 7E0C3F06 v_mul_f32_e32 v2, 0.15915494, v2 ; 0A0404F8 v_mul_f32_e32 v6, v8, v6 ; 0A0C0D08 v_fract_f32_e32 v2, v2 ; 7E043702 v_mac_f32_e32 v6, v3, v5 ; 2C0C0B03 v_mul_f32_e32 v5, v9, v4 ; 0A0A0909 v_cos_f32_e32 v3, v2 ; 7E065502 s_buffer_load_dword s18, s[0:3], 0x10 ; C0220480 00000010 s_buffer_load_dword s19, s[0:3], 0x14 ; C02204C0 00000014 v_sin_f32_e32 v2, v2 ; 7E045302 v_floor_f32_e32 v5, v5 ; 7E0A3F05 s_load_dwordx8 s[8:15], s[4:5], 0x200 ; C00E0202 00000200 s_load_dwordx4 s[0:3], s[4:5], 0x230 ; C00A0002 00000230 v_mad_f32 v4, v4, v9, -v5 ; D1C10004 84161304 v_mul_f32_e32 v4, 0x41800000, v4 ; 0A0808FF 41800000 v_mul_f32_e32 v5, v3, v4 ; 0A0A0903 v_mul_f32_e32 v4, v2, v4 ; 0A080902 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, s18, v5, v0 ; D1C10002 04020A12 v_mad_f32 v3, s19, v4, v1 ; D1C10003 04060813 image_sample v8, v[2:3], s[8:15], s[0:3] dmask:0x1 ; F0800100 00020802 v_mad_f32 v2, -s18, v4, v0 ; D1C10002 24020812 v_mad_f32 v3, s19, v5, v1 ; D1C10003 04060A13 image_sample v2, v[2:3], s[8:15], s[0:3] dmask:0x1 ; F0800100 00020202 s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v8, v2, v8 ; 02101102 v_mad_f32 v2, -s18, v5, v0 ; D1C10002 24020A12 v_mad_f32 v3, -s19, v4, v1 ; D1C10003 24060813 image_sample v2, v[2:3], s[8:15], s[0:3] dmask:0x1 ; F0800100 00020202 image_sample v7, v[0:1], s[8:15], s[0:3] dmask:0x1 ; F0800100 00020700 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v2, v2, v8 ; 02041102 v_mac_f32_e32 v0, s18, v4 ; 2C000812 v_mad_f32 v1, -s19, v5, v1 ; D1C10001 24060A13 s_and_b64 exec, exec, s[16:17] ; 86FE107E image_sample v0, v[0:1], s[8:15], s[0:3] dmask:0x1 ; F0800100 00020000 s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v0, v0, v2 ; 02000500 v_mov_b32_e32 v1, 0x3e800000 ; 7E0202FF 3E800000 v_mul_f32_e32 v2, v1, v0 ; 0A040101 v_mad_f32 v0, -v0, v1, v7 ; D1C10000 241E0300 v_mov_b32_e32 v1, 0x3b7ffbce ; 7E0202FF 3B7FFBCE v_cmp_gt_f32_e64 vcc, |v0|, v1 ; D044016A 00020300 v_mul_f32_e32 v1, v9, v6 ; 0A020D09 v_floor_f32_e32 v1, v1 ; 7E023F01 v_mad_f32 v1, v6, v9, -v1 ; D1C10001 84061306 v_cndmask_b32_e32 v0, v2, v7, vcc ; 00000F02 v_add_f32_e32 v1, -0.5, v1 ; 020202F1 v_mac_f32_e32 v0, 0x3bbffcdb, v1 ; 2C0002FF 3BBFFCDB v_mov_b32_e32 v1, 0 ; 7E020280 v_mov_b32_e32 v2, 0 ; 7E040280 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 Shader epilog disassembly: v_max_f32_e64 v0, v0, v0 clamp ; D10B8000 00020100 v_mov_b32_e32 v4, 0x477fff00 ; 7E0802FF 477FFF00 v_max_f32_e64 v1, v1, v1 clamp ; D10B8001 00020301 v_max_f32_e64 v2, v2, v2 clamp ; D10B8002 00020502 v_max_f32_e64 v3, v3, v3 clamp ; D10B8003 00020703 v_mad_f32 v0, v0, v4, 0.5 ; D1C10000 03C20900 v_mad_f32 v1, v1, v4, 0.5 ; D1C10001 03C20901 v_mad_f32 v2, v2, v4, 0.5 ; D1C10002 03C20902 v_mad_f32 v3, v3, v4, 0.5 ; D1C10003 03C20903 v_cvt_u32_f32_e32 v0, v0 ; 7E000F00 v_cvt_u32_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; 7E020EF9 06060501 v_cvt_u32_f32_e32 v2, v2 ; 7E040F02 v_cvt_u32_f32_sdwa v3, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; 7E060EF9 06060503 v_or_b32_e32 v0, v0, v1 ; 28000300 v_or_b32_e32 v1, v2, v3 ; 28020702 exp mrt0 v0, v0, v1, v1 done compr vm ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0002 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 676 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** FRAG DCL IN[0].xy, GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..1] DCL TEMP[0..87], LOCAL IMM[0] FLT32 { 1.0000, 34.0000, 0.0035, 289.0000} IMM[1] FLT32 { 0.0244, 6.2832, 16.0000, 0.2500} IMM[2] FLT32 { 0.0039, 0.0059, -0.5000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyxx 1: MOV TEMP[0].z, CONST[0].xxxx 2: ADD TEMP[1].xyz, TEMP[0].xyzz, IMM[0].xxxx 3: MAD TEMP[2].x, IMM[0].yyyy, TEMP[1].xxxx, IMM[0].xxxx 4: MUL TEMP[3].x, TEMP[2].xxxx, TEMP[1].xxxx 5: MUL TEMP[4].x, TEMP[3].xxxx, IMM[0].zzzz 6: FLR TEMP[5].x, TEMP[4].xxxx 7: MUL TEMP[6].x, TEMP[5].xxxx, IMM[0].wwww 8: ADD TEMP[7].x, TEMP[3].xxxx, -TEMP[6].xxxx 9: ADD TEMP[8].x, TEMP[7].xxxx, TEMP[1].yyyy 10: MAD TEMP[9].x, IMM[0].yyyy, TEMP[8].xxxx, IMM[0].xxxx 11: MUL TEMP[10].x, TEMP[9].xxxx, TEMP[8].xxxx 12: MUL TEMP[11].x, TEMP[10].xxxx, IMM[0].zzzz 13: FLR TEMP[12].x, TEMP[11].xxxx 14: MUL TEMP[13].x, TEMP[12].xxxx, IMM[0].wwww 15: ADD TEMP[14].x, TEMP[10].xxxx, -TEMP[13].xxxx 16: ADD TEMP[15].x, TEMP[14].xxxx, TEMP[1].zzzz 17: MAD TEMP[16].x, IMM[0].yyyy, TEMP[15].xxxx, IMM[0].xxxx 18: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[15].xxxx 19: MUL TEMP[18].x, TEMP[17].xxxx, IMM[0].zzzz 20: FLR TEMP[19].x, TEMP[18].xxxx 21: MUL TEMP[20].x, TEMP[19].xxxx, IMM[0].wwww 22: ADD TEMP[21].x, TEMP[17].xxxx, -TEMP[20].xxxx 23: MOV TEMP[22].xy, IN[0].xyyy 24: TEX TEMP[23], TEMP[22], SAMP[0], 2D 25: MAD TEMP[24].x, IMM[0].yyyy, TEMP[21].xxxx, IMM[0].xxxx 26: MUL TEMP[25].x, TEMP[24].xxxx, TEMP[21].xxxx 27: MUL TEMP[26].x, TEMP[25].xxxx, IMM[0].zzzz 28: FLR TEMP[27].x, TEMP[26].xxxx 29: MUL TEMP[28].x, TEMP[27].xxxx, IMM[0].wwww 30: ADD TEMP[29].x, TEMP[25].xxxx, -TEMP[28].xxxx 31: MUL TEMP[30].x, TEMP[29].xxxx, IMM[1].xxxx 32: FRC TEMP[31].x, TEMP[30].xxxx 33: MUL TEMP[32].x, TEMP[31].xxxx, IMM[1].yyyy 34: MAD TEMP[33].x, IMM[0].yyyy, TEMP[29].xxxx, IMM[0].xxxx 35: MUL TEMP[34].x, TEMP[33].xxxx, TEMP[29].xxxx 36: MUL TEMP[35].x, TEMP[34].xxxx, IMM[0].zzzz 37: FLR TEMP[36].x, TEMP[35].xxxx 38: MUL TEMP[37].x, TEMP[36].xxxx, IMM[0].wwww 39: ADD TEMP[38].x, TEMP[34].xxxx, -TEMP[37].xxxx 40: COS TEMP[39].x, TEMP[32].xxxx 41: SIN TEMP[40].x, TEMP[32].xxxx 42: MOV TEMP[39].y, TEMP[40].xxxx 43: MUL TEMP[41].x, TEMP[21].xxxx, IMM[1].xxxx 44: FRC TEMP[42].x, TEMP[41].xxxx 45: MUL TEMP[43].x, TEMP[42].xxxx, IMM[1].zzzz 46: MUL TEMP[44].xy, TEMP[43].xxxx, TEMP[39].xyyy 47: MOV TEMP[45].x, -TEMP[44].yyyy 48: MOV TEMP[45].y, TEMP[44].xxxx 49: MOV TEMP[46].x, -TEMP[44].xxxx 50: MOV TEMP[46].y, -TEMP[44].yyyy 51: MOV TEMP[47].x, TEMP[44].yyyy 52: MOV TEMP[47].y, -TEMP[44].xxxx 53: MAD TEMP[48].xy, CONST[1].xyyy, TEMP[44].xyyy, IN[0].xyyy 54: MOV TEMP[49].xy, TEMP[48].xyyy 55: TEX TEMP[50], TEMP[49], SAMP[0], 2D 56: MAD TEMP[51].xy, CONST[1].xyyy, TEMP[45].xyyy, IN[0].xyyy 57: MOV TEMP[52].xy, TEMP[51].xyyy 58: TEX TEMP[53], TEMP[52], SAMP[0], 2D 59: ADD TEMP[54], TEMP[50], TEMP[53] 60: MAD TEMP[55].xy, CONST[1].xyyy, TEMP[46].xyyy, IN[0].xyyy 61: MOV TEMP[56].xy, TEMP[55].xyyy 62: TEX TEMP[57], TEMP[56], SAMP[0], 2D 63: ADD TEMP[58], TEMP[54], TEMP[57] 64: MAD TEMP[59].xy, CONST[1].xyyy, TEMP[47].xyyy, IN[0].xyyy 65: MOV TEMP[60].xy, TEMP[59].xyyy 66: TEX TEMP[61], TEMP[60], SAMP[0], 2D 67: ADD TEMP[62], TEMP[58], TEMP[61] 68: MUL TEMP[63], TEMP[62], IMM[1].wwww 69: ADD TEMP[64], TEMP[23], -TEMP[63] 70: MOV TEMP[65], |TEMP[64]| 71: FSLT TEMP[66], IMM[2].xxxx, TEMP[65] 72: UCMP TEMP[67].xyz, TEMP[66], TEMP[23], TEMP[63] 73: MUL TEMP[68].x, TEMP[38].xxxx, IMM[1].xxxx 74: FRC TEMP[69].x, TEMP[68].xxxx 75: MAD TEMP[70].x, IMM[0].yyyy, TEMP[38].xxxx, IMM[0].xxxx 76: MUL TEMP[71].x, TEMP[70].xxxx, TEMP[38].xxxx 77: MUL TEMP[72].x, TEMP[71].xxxx, IMM[0].zzzz 78: FLR TEMP[73].x, TEMP[72].xxxx 79: MUL TEMP[74].x, TEMP[73].xxxx, IMM[0].wwww 80: ADD TEMP[75].x, TEMP[71].xxxx, -TEMP[74].xxxx 81: MUL TEMP[76].x, TEMP[75].xxxx, IMM[1].xxxx 82: FRC TEMP[77].x, TEMP[76].xxxx 83: MOV TEMP[69].y, TEMP[77].xxxx 84: MAD TEMP[78].x, IMM[0].yyyy, TEMP[75].xxxx, IMM[0].xxxx 85: MUL TEMP[79].x, TEMP[78].xxxx, TEMP[75].xxxx 86: MUL TEMP[80].x, TEMP[79].xxxx, IMM[0].zzzz 87: FLR TEMP[81].x, TEMP[80].xxxx 88: MUL TEMP[82].x, TEMP[81].xxxx, IMM[0].wwww 89: ADD TEMP[83].x, TEMP[79].xxxx, -TEMP[82].xxxx 90: MUL TEMP[84].x, TEMP[83].xxxx, IMM[1].xxxx 91: FRC TEMP[85].x, TEMP[84].xxxx 92: MOV TEMP[69].z, TEMP[85].xxxx 93: ADD TEMP[86].xyz, TEMP[69].xyzz, IMM[2].zzzz 94: MAD TEMP[87].xy, IMM[2].yyyy, TEMP[86].xyzz, TEMP[67].xyzz 95: MOV TEMP[67].xy, TEMP[87].xyxx 96: MOV TEMP[67].z, IMM[2].wwww 97: MOV TEMP[67].w, IMM[0].xxxx 98: MOV OUT[0], TEMP[67] 99: END radeonsi: Compiling shader 19 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %21 = bitcast <2 x i32> %6 to <2 x float> %22 = extractelement <2 x float> %21, i32 0 %23 = extractelement <2 x float> %21, i32 1 %24 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 0, i32 0, i32 %4) #2 %25 = call nsz float @llvm.amdgcn.interp.p2(float %24, float %23, i32 0, i32 0, i32 %4) #2 %26 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 1, i32 0, i32 %4) #2 %27 = call nsz float @llvm.amdgcn.interp.p2(float %26, float %23, i32 1, i32 0, i32 %4) #2 %28 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %1, i64 0, i64 16, !amdgpu.uniform !0 %29 = load <4 x i32>, <4 x i32> addrspace(2)* %28, align 16, !invariant.load !0 %30 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 0) %31 = fadd nsz float %25, 1.000000e+00 %32 = fadd nsz float %27, 1.000000e+00 %33 = fadd nsz float %30, 1.000000e+00 %34 = fmul nsz float %31, 3.400000e+01 %35 = fadd nsz float %34, 1.000000e+00 %36 = fmul nsz float %35, %31 %37 = fmul nsz float %36, 0x3F6C5894E0000000 %38 = call nsz float @llvm.floor.f32(float %37) #2 %39 = fmul nsz float %38, 2.890000e+02 %40 = fsub nsz float %36, %39 %41 = fadd nsz float %40, %32 %42 = fmul nsz float %41, 3.400000e+01 %43 = fadd nsz float %42, 1.000000e+00 %44 = fmul nsz float %43, %41 %45 = fmul nsz float %44, 0x3F6C5894E0000000 %46 = call nsz float @llvm.floor.f32(float %45) #2 %47 = fmul nsz float %46, 2.890000e+02 %48 = fsub nsz float %44, %47 %49 = fadd nsz float %48, %33 %50 = fmul nsz float %49, 3.400000e+01 %51 = fadd nsz float %50, 1.000000e+00 %52 = fmul nsz float %51, %49 %53 = fmul nsz float %52, 0x3F6C5894E0000000 %54 = call nsz float @llvm.floor.f32(float %53) #2 %55 = fmul nsz float %54, 2.890000e+02 %56 = fsub nsz float %52, %55 %57 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %58 = load <8 x i32>, <8 x i32> addrspace(2)* %57, align 32, !invariant.load !0 %59 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %60 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %59, i64 0, i64 35, !amdgpu.uniform !0 %61 = load <4 x i32>, <4 x i32> addrspace(2)* %60, align 16, !invariant.load !0 %62 = bitcast float %25 to i32 %63 = bitcast float %27 to i32 %64 = insertelement <2 x i32> undef, i32 %62, i32 0 %65 = insertelement <2 x i32> %64, i32 %63, i32 1 %66 = bitcast <2 x i32> %65 to <2 x float> %67 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %66, <8 x i32> %58, <4 x i32> %61, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = fmul nsz float %56, 3.400000e+01 %71 = fadd nsz float %70, 1.000000e+00 %72 = fmul nsz float %71, %56 %73 = fmul nsz float %72, 0x3F6C5894E0000000 %74 = call nsz float @llvm.floor.f32(float %73) #2 %75 = fmul nsz float %74, 2.890000e+02 %76 = fsub nsz float %72, %75 %77 = fmul nsz float %76, 0x3F98F9C180000000 %78 = call nsz float @llvm.floor.f32(float %77) #2 %79 = fsub nsz float %77, %78 %80 = fmul nsz float %79, 0x401921FB60000000 %81 = fmul nsz float %76, 3.400000e+01 %82 = fadd nsz float %81, 1.000000e+00 %83 = fmul nsz float %82, %76 %84 = fmul nsz float %83, 0x3F6C5894E0000000 %85 = call nsz float @llvm.floor.f32(float %84) #2 %86 = fmul nsz float %85, 2.890000e+02 %87 = fsub nsz float %83, %86 %88 = call nsz float @llvm.cos.f32(float %80) #2 %89 = call nsz float @llvm.sin.f32(float %80) #2 %90 = fmul nsz float %56, 0x3F98F9C180000000 %91 = call nsz float @llvm.floor.f32(float %90) #2 %92 = fsub nsz float %90, %91 %93 = fmul nsz float %92, 1.600000e+01 %94 = fmul nsz float %93, %88 %95 = fmul nsz float %93, %89 %96 = fsub nsz float -0.000000e+00, %95 %97 = fsub nsz float -0.000000e+00, %94 %98 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 16) %99 = fmul nsz float %98, %94 %100 = fadd nsz float %99, %25 %101 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %29, i32 20) %102 = fmul nsz float %101, %95 %103 = fadd nsz float %102, %27 %104 = bitcast float %100 to i32 %105 = bitcast float %103 to i32 %106 = insertelement <2 x i32> undef, i32 %104, i32 0 %107 = insertelement <2 x i32> %106, i32 %105, i32 1 %108 = bitcast <2 x i32> %107 to <2 x float> %109 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %108, <8 x i32> %58, <4 x i32> %61, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %110 = extractelement <4 x float> %109, i32 0 %111 = extractelement <4 x float> %109, i32 1 %112 = fmul nsz float %98, %96 %113 = fadd nsz float %112, %25 %114 = fmul nsz float %101, %94 %115 = fadd nsz float %114, %27 %116 = bitcast float %113 to i32 %117 = bitcast float %115 to i32 %118 = insertelement <2 x i32> undef, i32 %116, i32 0 %119 = insertelement <2 x i32> %118, i32 %117, i32 1 %120 = bitcast <2 x i32> %119 to <2 x float> %121 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %120, <8 x i32> %58, <4 x i32> %61, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %122 = extractelement <4 x float> %121, i32 0 %123 = extractelement <4 x float> %121, i32 1 %124 = fadd nsz float %110, %122 %125 = fadd nsz float %111, %123 %126 = fmul nsz float %98, %97 %127 = fadd nsz float %126, %25 %128 = fmul nsz float %101, %96 %129 = fadd nsz float %128, %27 %130 = bitcast float %127 to i32 %131 = bitcast float %129 to i32 %132 = insertelement <2 x i32> undef, i32 %130, i32 0 %133 = insertelement <2 x i32> %132, i32 %131, i32 1 %134 = bitcast <2 x i32> %133 to <2 x float> %135 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %134, <8 x i32> %58, <4 x i32> %61, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %136 = extractelement <4 x float> %135, i32 0 %137 = extractelement <4 x float> %135, i32 1 %138 = fadd nsz float %124, %136 %139 = fadd nsz float %125, %137 %140 = fmul nsz float %98, %95 %141 = fadd nsz float %140, %25 %142 = fmul nsz float %101, %97 %143 = fadd nsz float %142, %27 %144 = bitcast float %141 to i32 %145 = bitcast float %143 to i32 %146 = insertelement <2 x i32> undef, i32 %144, i32 0 %147 = insertelement <2 x i32> %146, i32 %145, i32 1 %148 = bitcast <2 x i32> %147 to <2 x float> %149 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %148, <8 x i32> %58, <4 x i32> %61, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %150 = extractelement <4 x float> %149, i32 0 %151 = extractelement <4 x float> %149, i32 1 %152 = fadd nsz float %138, %150 %153 = fadd nsz float %139, %151 %154 = fmul nsz float %152, 2.500000e-01 %155 = fmul nsz float %153, 2.500000e-01 %156 = fsub nsz float %68, %154 %157 = fsub nsz float %69, %155 %158 = call nsz float @llvm.fabs.f32(float %156) #4 %159 = call nsz float @llvm.fabs.f32(float %157) #4 %160 = fcmp nsz ogt float %158, 0x3F6FFF79C0000000 %161 = fcmp nsz ogt float %159, 0x3F6FFF79C0000000 %162 = select i1 %160, float %68, float %154 %163 = select i1 %161, float %69, float %155 %164 = fmul nsz float %87, 0x3F98F9C180000000 %165 = call nsz float @llvm.floor.f32(float %164) #2 %166 = fsub nsz float %164, %165 %167 = fmul nsz float %87, 3.400000e+01 %168 = fadd nsz float %167, 1.000000e+00 %169 = fmul nsz float %168, %87 %170 = fmul nsz float %169, 0x3F6C5894E0000000 %171 = call nsz float @llvm.floor.f32(float %170) #2 %172 = fmul nsz float %171, 2.890000e+02 %173 = fsub nsz float %169, %172 %174 = fmul nsz float %173, 0x3F98F9C180000000 %175 = call nsz float @llvm.floor.f32(float %174) #2 %176 = fsub nsz float %174, %175 %177 = fadd nsz float %166, -5.000000e-01 %178 = fadd nsz float %176, -5.000000e-01 %179 = fmul nsz float %177, 0x3F77FF9B60000000 %180 = fadd nsz float %179, %162 %181 = fmul nsz float %178, 0x3F77FF9B60000000 %182 = fadd nsz float %181, %163 %183 = bitcast float %3 to i32 %184 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %183, 6 %185 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %184, float %180, 7 %186 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %185, float %182, 8 %187 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %186, float 0.000000e+00, 9 %188 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %187, float 1.000000e+00, 10 %189 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %188, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %189 } ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3 ; Function Attrs: nounwind readnone speculatable declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.fabs.f32(float) #1 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind readnone } attributes #3 = { nounwind readonly } attributes #4 = { nounwind } !0 = !{} SHADER KEY part.vs.prolog.instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} mono.vs.fix_fetch = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 mono.u.vs_export_prim_id = 0 opt.kill_outputs[0] = 0x0 opt.kill_outputs[1] = 0x0 opt.clip_disable = 0 Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s8, v0 ; 32080008 v_mov_b32_e32 v5, v4 ; 7E0A0304 Shader main disassembly: s_load_dwordx4 s[0:3], s[6:7], 0x0 ; C00A0003 00000000 s_load_dwordx4 s[4:7], s[6:7], 0x10 ; C00A0103 00000010 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v5, s[4:7], 0 idxen ; E00C2000 80010005 s_waitcnt vmcnt(0) ; BF8C0F70 buffer_load_format_xyzw v[2:5], v4, s[0:3], 0 idxen ; E00C2000 80000204 exp param0 v0, v1, v0, v0 ; C400020F 00000100 s_waitcnt expcnt(0) ; BF8C0F0F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 s_waitcnt vmcnt(0) ; BF8C0F70 exp pos0 v2, v3, v0, v0 done ; C40008CF 00000302 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x5 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b64 s[16:17], exec ; BE90017E s_wqm_b64 exec, exec ; BEFE077E s_mov_b32 m0, s7 ; BEFC0007 v_interp_p1_f32 v0, v2, attr0.x ; D4000002 v_interp_p2_f32 v0, v3, attr0.x ; D4010003 v_interp_p1_f32 v1, v2, attr0.y ; D4040102 v_add_f32_e32 v2, 1.0, v0 ; 020400F2 v_mov_b32_e32 v8, 0x42080000 ; 7E1002FF 42080000 v_mad_f32 v5, v2, v8, 1.0 ; D1C10005 03CA1102 s_load_dwordx4 s[0:3], s[2:3], 0x100 ; C00A0001 00000100 v_mul_f32_e32 v2, v2, v5 ; 0A040B02 v_mov_b32_e32 v9, 0x3b62c4a7 ; 7E1202FF 3B62C4A7 v_mul_f32_e32 v5, v9, v2 ; 0A0A0509 v_interp_p2_f32 v1, v3, attr0.y ; D4050103 v_floor_f32_e32 v5, v5 ; 7E0A3F05 v_mov_b32_e32 v10, 0xc3908000 ; 7E1402FF C3908000 v_add_f32_e32 v3, 1.0, v1 ; 020602F2 v_mac_f32_e32 v2, v10, v5 ; 2C040B0A v_add_f32_e32 v2, v3, v2 ; 02040503 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s7, s[0:3], 0x0 ; C02201C0 00000000 v_mad_f32 v3, v2, v8, 1.0 ; D1C10003 03CA1102 v_mul_f32_e32 v5, v2, v3 ; 0A0A0702 v_mul_f32_e32 v5, v9, v5 ; 0A0A0B09 v_floor_f32_e32 v5, v5 ; 7E0A3F05 v_mul_f32_e32 v5, v10, v5 ; 0A0A0B0A s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e64 v4, s7, 1.0 ; D1010004 0001E407 v_mac_f32_e32 v5, v2, v3 ; 2C0A0702 v_add_f32_e32 v2, v4, v5 ; 02040B04 v_mad_f32 v3, v2, v8, 1.0 ; D1C10003 03CA1102 v_mul_f32_e32 v4, v2, v3 ; 0A080702 v_mul_f32_e32 v4, v9, v4 ; 0A080909 v_floor_f32_e32 v4, v4 ; 7E083F04 v_mul_f32_e32 v4, v10, v4 ; 0A08090A v_mac_f32_e32 v4, v2, v3 ; 2C080702 v_mad_f32 v5, v4, v8, 1.0 ; D1C10005 03CA1104 v_mul_f32_e32 v6, v4, v5 ; 0A0C0B04 v_mul_f32_e32 v6, v9, v6 ; 0A0C0D09 v_floor_f32_e32 v6, v6 ; 7E0C3F06 v_mul_f32_e32 v6, v10, v6 ; 0A0C0D0A v_mac_f32_e32 v6, v4, v5 ; 2C0C0B04 v_mov_b32_e32 v11, 0x3cc7ce0c ; 7E1602FF 3CC7CE0C v_mul_f32_e32 v5, v11, v6 ; 0A0A0D0B v_floor_f32_e32 v5, v5 ; 7E0A3F05 v_mad_f32 v7, v6, v8, 1.0 ; D1C10007 03CA1106 v_mad_f32 v5, v6, v11, -v5 ; D1C10005 84161706 v_mul_f32_e32 v12, v6, v7 ; 0A180F06 v_mul_f32_e32 v5, 0x40c90fdb, v5 ; 0A0A0AFF 40C90FDB v_mul_f32_e32 v12, v9, v12 ; 0A181909 v_floor_f32_e32 v12, v12 ; 7E183F0C v_mul_f32_e32 v5, 0.15915494, v5 ; 0A0A0AF8 v_mul_f32_e32 v12, v10, v12 ; 0A18190A v_fract_f32_e32 v5, v5 ; 7E0A3705 v_mac_f32_e32 v12, v6, v7 ; 2C180F06 v_mul_f32_e32 v7, v11, v4 ; 0A0E090B v_cos_f32_e32 v6, v5 ; 7E0C5505 s_buffer_load_dword s18, s[0:3], 0x10 ; C0220480 00000010 s_buffer_load_dword s19, s[0:3], 0x14 ; C02204C0 00000014 v_sin_f32_e32 v5, v5 ; 7E0A5305 v_floor_f32_e32 v7, v7 ; 7E0E3F07 s_load_dwordx8 s[8:15], s[4:5], 0x200 ; C00E0202 00000200 s_load_dwordx4 s[0:3], s[4:5], 0x230 ; C00A0002 00000230 v_mad_f32 v4, v4, v11, -v7 ; D1C10004 841E1704 v_mul_f32_e32 v4, 0x41800000, v4 ; 0A0808FF 41800000 v_mul_f32_e32 v15, v5, v4 ; 0A1E0905 v_mul_f32_e32 v14, v6, v4 ; 0A1C0906 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s18, v14, v0 ; D1C10004 04021C12 v_mad_f32 v5, s19, v15, v1 ; D1C10005 04061E13 v_mad_f32 v6, -s18, v15, v0 ; D1C10006 24021E12 v_mad_f32 v7, s19, v14, v1 ; D1C10007 04061C13 image_sample v[4:5], v[4:5], s[8:15], s[0:3] dmask:0x3 ; F0800300 00020404 image_sample v[6:7], v[6:7], s[8:15], s[0:3] dmask:0x3 ; F0800300 00020606 s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v6, v6, v4 ; 020C0906 v_add_f32_e32 v7, v7, v5 ; 020E0B07 v_mad_f32 v4, -s18, v14, v0 ; D1C10004 24021C12 v_mad_f32 v5, -s19, v15, v1 ; D1C10005 24061E13 image_sample v[4:5], v[4:5], s[8:15], s[0:3] dmask:0x3 ; F0800300 00020404 image_sample v[2:3], v[0:1], s[8:15], s[0:3] dmask:0x3 ; F0800300 00020200 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v4, v4, v6 ; 02080D04 v_add_f32_e32 v5, v5, v7 ; 020A0F05 v_mac_f32_e32 v0, s18, v15 ; 2C001E12 v_mad_f32 v1, -s19, v14, v1 ; D1C10001 24061C13 s_and_b64 exec, exec, s[16:17] ; 86FE107E image_sample v[0:1], v[0:1], s[8:15], s[0:3] dmask:0x3 ; F0800300 00020000 s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v0, v0, v4 ; 02000900 v_add_f32_e32 v1, v1, v5 ; 02020B01 v_mov_b32_e32 v4, 0x3e800000 ; 7E0802FF 3E800000 v_mul_f32_e32 v5, v4, v0 ; 0A0A0104 v_mul_f32_e32 v6, v4, v1 ; 0A0C0304 v_mad_f32 v0, -v0, v4, v2 ; D1C10000 240A0900 v_mad_f32 v1, -v1, v4, v3 ; D1C10001 240E0901 v_mov_b32_e32 v4, 0x3b7ffbce ; 7E0802FF 3B7FFBCE v_cmp_gt_f32_e64 s[0:1], |v1|, v4 ; D0440100 00020901 v_cndmask_b32_e64 v1, v6, v3, s[0:1] ; D1000001 00020706 v_mad_f32 v3, v12, v8, 1.0 ; D1C10003 03CA110C v_cmp_gt_f32_e64 vcc, |v0|, v4 ; D044016A 00020900 v_mul_f32_e32 v4, v12, v3 ; 0A08070C v_mul_f32_e32 v4, v9, v4 ; 0A080909 v_floor_f32_e32 v4, v4 ; 7E083F04 v_mul_f32_e32 v4, v10, v4 ; 0A08090A v_mac_f32_e32 v4, v12, v3 ; 2C08070C v_mul_f32_e32 v3, v11, v4 ; 0A06090B v_cndmask_b32_e32 v0, v5, v2, vcc ; 00000505 v_mul_f32_e32 v2, v11, v12 ; 0A04190B v_floor_f32_e32 v2, v2 ; 7E043F02 v_floor_f32_e32 v3, v3 ; 7E063F03 v_mad_f32 v3, v4, v11, -v3 ; D1C10003 840E1704 v_mad_f32 v2, v12, v11, -v2 ; D1C10002 840A170C v_add_f32_e32 v2, -0.5, v2 ; 020404F1 v_mov_b32_e32 v4, 0x3bbffcdb ; 7E0802FF 3BBFFCDB v_add_f32_e32 v3, -0.5, v3 ; 020606F1 v_mac_f32_e32 v0, v4, v2 ; 2C000504 v_mac_f32_e32 v1, v4, v3 ; 2C020704 v_mov_b32_e32 v2, 0 ; 7E040280 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 Shader epilog disassembly: v_max_f32_e64 v0, v0, v0 clamp ; D10B8000 00020100 v_mov_b32_e32 v4, 0x477fff00 ; 7E0802FF 477FFF00 v_max_f32_e64 v1, v1, v1 clamp ; D10B8001 00020301 v_max_f32_e64 v2, v2, v2 clamp ; D10B8002 00020502 v_max_f32_e64 v3, v3, v3 clamp ; D10B8003 00020703 v_mad_f32 v0, v0, v4, 0.5 ; D1C10000 03C20900 v_mad_f32 v1, v1, v4, 0.5 ; D1C10001 03C20901 v_mad_f32 v2, v2, v4, 0.5 ; D1C10002 03C20902 v_mad_f32 v3, v3, v4, 0.5 ; D1C10003 03C20903 v_cvt_u32_f32_e32 v0, v0 ; 7E000F00 v_cvt_u32_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; 7E020EF9 06060501 v_cvt_u32_f32_e32 v2, v2 ; 7E040F02 v_cvt_u32_f32_sdwa v3, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; 7E060EF9 06060503 v_or_b32_e32 v0, v0, v1 ; 28000300 v_or_b32_e32 v1, v2, v3 ; 28020702 exp mrt0 v0, v0, v1, v1 done compr vm ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0002 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 768 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 1D, FLOAT DCL TEMP[0..1] 0: F2I TEMP[1], IN[0] 1: TXF_LZ TEMP[0], TEMP[1], SAMP[0], 1D 2: MOV OUT[0], TEMP[0] 3: END radeonsi: Compiling shader 20 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %21 = bitcast <2 x i32> %10 to <2 x float> %22 = extractelement <2 x float> %21, i32 0 %23 = extractelement <2 x float> %21, i32 1 %24 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 0, i32 0, i32 %4) #3 %25 = call nsz float @llvm.amdgcn.interp.p2(float %24, float %23, i32 0, i32 0, i32 %4) #3 %26 = fptosi float %25 to i32 %27 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !invariant.load !0 %29 = call nsz <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %26, <8 x i32> %28, i32 15, i1 false, i1 false, i1 false, i1 false) #3 %30 = extractelement <4 x float> %29, i32 0 %31 = extractelement <4 x float> %29, i32 1 %32 = extractelement <4 x float> %29, i32 2 %33 = extractelement <4 x float> %29, i32 3 %34 = bitcast float %3 to i32 %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %34, 6 %36 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %35, float %30, 7 %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %36, float %31, 8 %38 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %37, float %32, 9 %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %38, float %33, 10 %40 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %39, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %40 } ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #2 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind readonly } attributes #3 = { nounwind readnone } !0 = !{} SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x4 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b32 m0, s7 ; BEFC0007 v_interp_p1_f32 v0, v8, attr0.x ; D4000008 v_interp_p2_f32 v0, v9, attr0.x ; D4010009 s_load_dwordx8 s[8:15], s[4:5], 0x200 ; C00E0202 00000200 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 s_waitcnt lgkmcnt(0) ; BF8C007F image_load v[0:3], v0, s[8:15] dmask:0xf unorm ; F0001F00 00020000 s_waitcnt vmcnt(0) ; BF8C0F70 Shader epilog disassembly: v_cvt_pkrtz_f16_f32 v0, v0, v1 ; D2960000 00020300 v_cvt_pkrtz_f16_f32 v1, v2, v3 ; D2960001 00020702 exp mrt0 v0, v0, v1, v1 done compr vm ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0020 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** FRAG DCL IN[0].xy, GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 1D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0..1] DCL TEMP[0..510], LOCAL IMM[0] FLT32 { 0.0000, -0.5000, -1.0000, -3.0000} IMM[1] FLT32 { 0.3088, 1.0000, 0.0078, 0.9922} IMM[2] FLT32 { 1.0000, -3.0000, 2.0000, -2.0000} IMM[3] FLT32 { -1.0000, -2.0000, 0.0000, 3.0000} IMM[4] FLT32 { 1.0000, -1.0000, 2.0000, 4.0000} IMM[5] FLT32 { 1.0000, 0.0000, 2.0000, 4.0000} IMM[6] FLT32 { 3.0000, 1.0000, 2.0000, 0.0000} 0: MOV TEMP[0], IMM[0].xxxx 1: MAD TEMP[1].xy, IN[0].xyyy, CONST[0].xyyy, IMM[0].yyyy 2: FRC TEMP[2].xy, TEMP[1].xyyy 3: MUL TEMP[3].xy, TEMP[2].xyyy, CONST[1].xyyy 4: ADD TEMP[4].xy, IN[0].xyyy, -TEMP[3].xyyy 5: MOV TEMP[5].x, IMM[0].xxxx 6: ADD TEMP[6].xy, IMM[0].zwww, -TEMP[2].xyyy 7: DP2 TEMP[7].x, TEMP[6].xyyy, TEMP[6].xyyy 8: SQRT TEMP[8].x, TEMP[7].xxxx 9: MUL TEMP[9].x, TEMP[8].xxxx, IMM[1].xxxx 10: FSLT TEMP[10].x, TEMP[9].xxxx, IMM[1].yyyy 11: UIF TEMP[10].xxxx 12: LRP TEMP[11].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 13: MOV TEMP[12].x, TEMP[11].xxxx 14: TEX TEMP[13].x, TEMP[12], SAMP[0], 1D 15: MOV TEMP[5].x, TEMP[13].xxxx 16: MAD TEMP[14].xy, CONST[1].xyyy, IMM[0].zwww, TEMP[4].xyyy 17: MOV TEMP[15].xy, TEMP[14].xyyy 18: TEX TEMP[16], TEMP[15], SAMP[1], 2D 19: MUL TEMP[0], TEMP[13].xxxx, TEMP[16] 20: ENDIF 21: ADD TEMP[17].xy, IMM[0].xwww, -TEMP[2].xyyy 22: DP2 TEMP[18].x, TEMP[17].xyyy, TEMP[17].xyyy 23: SQRT TEMP[19].x, TEMP[18].xxxx 24: MUL TEMP[9].x, TEMP[19].xxxx, IMM[1].xxxx 25: FSLT TEMP[20].x, TEMP[9].xxxx, IMM[1].yyyy 26: UIF TEMP[20].xxxx 27: LRP TEMP[21].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 28: MOV TEMP[22].x, TEMP[21].xxxx 29: TEX TEMP[23].x, TEMP[22], SAMP[0], 1D 30: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[23].xxxx 31: MAD TEMP[24].xy, CONST[1].xyyy, IMM[0].xwww, TEMP[4].xyyy 32: MOV TEMP[25].xy, TEMP[24].xyyy 33: TEX TEMP[26], TEMP[25], SAMP[1], 2D 34: MAD TEMP[0], TEMP[23].xxxx, TEMP[26], TEMP[0] 35: ENDIF 36: ADD TEMP[27].xy, IMM[2].xyyy, -TEMP[2].xyyy 37: DP2 TEMP[28].x, TEMP[27].xyyy, TEMP[27].xyyy 38: SQRT TEMP[29].x, TEMP[28].xxxx 39: MUL TEMP[9].x, TEMP[29].xxxx, IMM[1].xxxx 40: FSLT TEMP[30].x, TEMP[9].xxxx, IMM[1].yyyy 41: UIF TEMP[30].xxxx 42: LRP TEMP[31].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 43: MOV TEMP[32].x, TEMP[31].xxxx 44: TEX TEMP[33].x, TEMP[32], SAMP[0], 1D 45: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[33].xxxx 46: MAD TEMP[34].xy, CONST[1].xyyy, IMM[2].xyyy, TEMP[4].xyyy 47: MOV TEMP[35].xy, TEMP[34].xyyy 48: TEX TEMP[36], TEMP[35], SAMP[1], 2D 49: MAD TEMP[0], TEMP[33].xxxx, TEMP[36], TEMP[0] 50: ENDIF 51: ADD TEMP[37].xy, IMM[2].zyyy, -TEMP[2].xyyy 52: DP2 TEMP[38].x, TEMP[37].xyyy, TEMP[37].xyyy 53: SQRT TEMP[39].x, TEMP[38].xxxx 54: MUL TEMP[9].x, TEMP[39].xxxx, IMM[1].xxxx 55: FSLT TEMP[40].x, TEMP[9].xxxx, IMM[1].yyyy 56: UIF TEMP[40].xxxx 57: LRP TEMP[41].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 58: MOV TEMP[42].x, TEMP[41].xxxx 59: TEX TEMP[43].x, TEMP[42], SAMP[0], 1D 60: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[43].xxxx 61: MAD TEMP[44].xy, CONST[1].xyyy, IMM[2].zyyy, TEMP[4].xyyy 62: MOV TEMP[45].xy, TEMP[44].xyyy 63: TEX TEMP[46], TEMP[45], SAMP[1], 2D 64: MAD TEMP[0], TEMP[43].xxxx, TEMP[46], TEMP[0] 65: ENDIF 66: ADD TEMP[47].xy, IMM[2].wwww, -TEMP[2].xyyy 67: DP2 TEMP[48].x, TEMP[47].xyyy, TEMP[47].xyyy 68: SQRT TEMP[49].x, TEMP[48].xxxx 69: MUL TEMP[9].x, TEMP[49].xxxx, IMM[1].xxxx 70: FSLT TEMP[50].x, TEMP[9].xxxx, IMM[1].yyyy 71: UIF TEMP[50].xxxx 72: LRP TEMP[51].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 73: MOV TEMP[52].x, TEMP[51].xxxx 74: TEX TEMP[53].x, TEMP[52], SAMP[0], 1D 75: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[53].xxxx 76: MAD TEMP[54].xy, CONST[1].xyyy, IMM[2].wwww, TEMP[4].xyyy 77: MOV TEMP[55].xy, TEMP[54].xyyy 78: TEX TEMP[56], TEMP[55], SAMP[1], 2D 79: MAD TEMP[0], TEMP[53].xxxx, TEMP[56], TEMP[0] 80: ENDIF 81: ADD TEMP[57].xy, IMM[3].xyyy, -TEMP[2].xyyy 82: DP2 TEMP[58].x, TEMP[57].xyyy, TEMP[57].xyyy 83: SQRT TEMP[59].x, TEMP[58].xxxx 84: MUL TEMP[9].x, TEMP[59].xxxx, IMM[1].xxxx 85: FSLT TEMP[60].x, TEMP[9].xxxx, IMM[1].yyyy 86: UIF TEMP[60].xxxx 87: LRP TEMP[61].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 88: MOV TEMP[62].x, TEMP[61].xxxx 89: TEX TEMP[63].x, TEMP[62], SAMP[0], 1D 90: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[63].xxxx 91: MAD TEMP[64].xy, CONST[1].xyyy, IMM[3].xyyy, TEMP[4].xyyy 92: MOV TEMP[65].xy, TEMP[64].xyyy 93: TEX TEMP[66], TEMP[65], SAMP[1], 2D 94: MAD TEMP[0], TEMP[63].xxxx, TEMP[66], TEMP[0] 95: ENDIF 96: ADD TEMP[67].xy, IMM[3].zyyy, -TEMP[2].xyyy 97: DP2 TEMP[68].x, TEMP[67].xyyy, TEMP[67].xyyy 98: SQRT TEMP[69].x, TEMP[68].xxxx 99: MUL TEMP[9].x, TEMP[69].xxxx, IMM[1].xxxx 100: FSLT TEMP[70].x, TEMP[9].xxxx, IMM[1].yyyy 101: UIF TEMP[70].xxxx 102: LRP TEMP[71].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 103: MOV TEMP[72].x, TEMP[71].xxxx 104: TEX TEMP[73].x, TEMP[72], SAMP[0], 1D 105: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[73].xxxx 106: MAD TEMP[74].xy, CONST[1].xyyy, IMM[3].zyyy, TEMP[4].xyyy 107: MOV TEMP[75].xy, TEMP[74].xyyy 108: TEX TEMP[76], TEMP[75], SAMP[1], 2D 109: MAD TEMP[0], TEMP[73].xxxx, TEMP[76], TEMP[0] 110: ENDIF 111: ADD TEMP[77].xy, IMM[2].xwww, -TEMP[2].xyyy 112: DP2 TEMP[78].x, TEMP[77].xyyy, TEMP[77].xyyy 113: SQRT TEMP[79].x, TEMP[78].xxxx 114: MUL TEMP[9].x, TEMP[79].xxxx, IMM[1].xxxx 115: FSLT TEMP[80].x, TEMP[9].xxxx, IMM[1].yyyy 116: UIF TEMP[80].xxxx 117: LRP TEMP[81].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 118: MOV TEMP[82].x, TEMP[81].xxxx 119: TEX TEMP[83].x, TEMP[82], SAMP[0], 1D 120: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[83].xxxx 121: MAD TEMP[84].xy, CONST[1].xyyy, IMM[2].xwww, TEMP[4].xyyy 122: MOV TEMP[85].xy, TEMP[84].xyyy 123: TEX TEMP[86], TEMP[85], SAMP[1], 2D 124: MAD TEMP[0], TEMP[83].xxxx, TEMP[86], TEMP[0] 125: ENDIF 126: ADD TEMP[87].xy, IMM[2].zwww, -TEMP[2].xyyy 127: DP2 TEMP[88].x, TEMP[87].xyyy, TEMP[87].xyyy 128: SQRT TEMP[89].x, TEMP[88].xxxx 129: MUL TEMP[9].x, TEMP[89].xxxx, IMM[1].xxxx 130: FSLT TEMP[90].x, TEMP[9].xxxx, IMM[1].yyyy 131: UIF TEMP[90].xxxx 132: LRP TEMP[91].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 133: MOV TEMP[92].x, TEMP[91].xxxx 134: TEX TEMP[93].x, TEMP[92], SAMP[0], 1D 135: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[93].xxxx 136: MAD TEMP[94].xy, CONST[1].xyyy, IMM[2].zwww, TEMP[4].xyyy 137: MOV TEMP[95].xy, TEMP[94].xyyy 138: TEX TEMP[96], TEMP[95], SAMP[1], 2D 139: MAD TEMP[0], TEMP[93].xxxx, TEMP[96], TEMP[0] 140: ENDIF 141: ADD TEMP[97].xy, IMM[3].wyyy, -TEMP[2].xyyy 142: DP2 TEMP[98].x, TEMP[97].xyyy, TEMP[97].xyyy 143: SQRT TEMP[99].x, TEMP[98].xxxx 144: MUL TEMP[9].x, TEMP[99].xxxx, IMM[1].xxxx 145: FSLT TEMP[100].x, TEMP[9].xxxx, IMM[1].yyyy 146: UIF TEMP[100].xxxx 147: LRP TEMP[101].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 148: MOV TEMP[102].x, TEMP[101].xxxx 149: TEX TEMP[103].x, TEMP[102], SAMP[0], 1D 150: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[103].xxxx 151: MAD TEMP[104].xy, CONST[1].xyyy, IMM[3].wyyy, TEMP[4].xyyy 152: MOV TEMP[105].xy, TEMP[104].xyyy 153: TEX TEMP[106], TEMP[105], SAMP[1], 2D 154: MAD TEMP[0], TEMP[103].xxxx, TEMP[106], TEMP[0] 155: ENDIF 156: ADD TEMP[107].xy, IMM[0].wzzz, -TEMP[2].xyyy 157: DP2 TEMP[108].x, TEMP[107].xyyy, TEMP[107].xyyy 158: SQRT TEMP[109].x, TEMP[108].xxxx 159: MUL TEMP[9].x, TEMP[109].xxxx, IMM[1].xxxx 160: FSLT TEMP[110].x, TEMP[9].xxxx, IMM[1].yyyy 161: UIF TEMP[110].xxxx 162: LRP TEMP[111].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 163: MOV TEMP[112].x, TEMP[111].xxxx 164: TEX TEMP[113].x, TEMP[112], SAMP[0], 1D 165: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[113].xxxx 166: MAD TEMP[114].xy, CONST[1].xyyy, IMM[0].wzzz, TEMP[4].xyyy 167: MOV TEMP[115].xy, TEMP[114].xyyy 168: TEX TEMP[116], TEMP[115], SAMP[1], 2D 169: MAD TEMP[0], TEMP[113].xxxx, TEMP[116], TEMP[0] 170: ENDIF 171: ADD TEMP[117].xy, IMM[3].yxxx, -TEMP[2].xyyy 172: DP2 TEMP[118].x, TEMP[117].xyyy, TEMP[117].xyyy 173: SQRT TEMP[119].x, TEMP[118].xxxx 174: MUL TEMP[9].x, TEMP[119].xxxx, IMM[1].xxxx 175: FSLT TEMP[120].x, TEMP[9].xxxx, IMM[1].yyyy 176: UIF TEMP[120].xxxx 177: LRP TEMP[121].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 178: MOV TEMP[122].x, TEMP[121].xxxx 179: TEX TEMP[123].x, TEMP[122], SAMP[0], 1D 180: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[123].xxxx 181: MAD TEMP[124].xy, CONST[1].xyyy, IMM[3].yxxx, TEMP[4].xyyy 182: MOV TEMP[125].xy, TEMP[124].xyyy 183: TEX TEMP[126], TEMP[125], SAMP[1], 2D 184: MAD TEMP[0], TEMP[123].xxxx, TEMP[126], TEMP[0] 185: ENDIF 186: ADD TEMP[127].xy, IMM[0].zzzz, -TEMP[2].xyyy 187: DP2 TEMP[128].x, TEMP[127].xyyy, TEMP[127].xyyy 188: SQRT TEMP[129].x, TEMP[128].xxxx 189: MUL TEMP[9].x, TEMP[129].xxxx, IMM[1].xxxx 190: LRP TEMP[130].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 191: MOV TEMP[131].x, TEMP[130].xxxx 192: TEX TEMP[132].x, TEMP[131], SAMP[0], 1D 193: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[132].xxxx 194: ADD TEMP[133].xy, TEMP[4].xyyy, -CONST[1].xyyy 195: MOV TEMP[134].xy, TEMP[133].xyyy 196: TEX TEMP[135], TEMP[134], SAMP[1], 2D 197: MAD TEMP[0], TEMP[132].xxxx, TEMP[135], TEMP[0] 198: ADD TEMP[136].xy, IMM[0].xzzz, -TEMP[2].xyyy 199: DP2 TEMP[137].x, TEMP[136].xyyy, TEMP[136].xyyy 200: SQRT TEMP[138].x, TEMP[137].xxxx 201: MUL TEMP[9].x, TEMP[138].xxxx, IMM[1].xxxx 202: LRP TEMP[139].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 203: MOV TEMP[140].x, TEMP[139].xxxx 204: TEX TEMP[141].x, TEMP[140], SAMP[0], 1D 205: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[141].xxxx 206: MAD TEMP[142].xy, CONST[1].xyyy, IMM[0].xzzz, TEMP[4].xyyy 207: MOV TEMP[143].xy, TEMP[142].xyyy 208: TEX TEMP[144], TEMP[143], SAMP[1], 2D 209: MAD TEMP[0], TEMP[141].xxxx, TEMP[144], TEMP[0] 210: ADD TEMP[145].xy, IMM[4].xyyy, -TEMP[2].xyyy 211: DP2 TEMP[146].x, TEMP[145].xyyy, TEMP[145].xyyy 212: SQRT TEMP[147].x, TEMP[146].xxxx 213: MUL TEMP[9].x, TEMP[147].xxxx, IMM[1].xxxx 214: LRP TEMP[148].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 215: MOV TEMP[149].x, TEMP[148].xxxx 216: TEX TEMP[150].x, TEMP[149], SAMP[0], 1D 217: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[150].xxxx 218: MAD TEMP[151].xy, CONST[1].xyyy, IMM[4].xyyy, TEMP[4].xyyy 219: MOV TEMP[152].xy, TEMP[151].xyyy 220: TEX TEMP[153], TEMP[152], SAMP[1], 2D 221: MAD TEMP[0], TEMP[150].xxxx, TEMP[153], TEMP[0] 222: ADD TEMP[154].xy, IMM[4].zyyy, -TEMP[2].xyyy 223: DP2 TEMP[155].x, TEMP[154].xyyy, TEMP[154].xyyy 224: SQRT TEMP[156].x, TEMP[155].xxxx 225: MUL TEMP[9].x, TEMP[156].xxxx, IMM[1].xxxx 226: LRP TEMP[157].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 227: MOV TEMP[158].x, TEMP[157].xxxx 228: TEX TEMP[159].x, TEMP[158], SAMP[0], 1D 229: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[159].xxxx 230: MAD TEMP[160].xy, CONST[1].xyyy, IMM[4].zyyy, TEMP[4].xyyy 231: MOV TEMP[161].xy, TEMP[160].xyyy 232: TEX TEMP[162], TEMP[161], SAMP[1], 2D 233: MAD TEMP[0], TEMP[159].xxxx, TEMP[162], TEMP[0] 234: ADD TEMP[163].xy, IMM[3].wxxx, -TEMP[2].xyyy 235: DP2 TEMP[164].x, TEMP[163].xyyy, TEMP[163].xyyy 236: SQRT TEMP[165].x, TEMP[164].xxxx 237: MUL TEMP[9].x, TEMP[165].xxxx, IMM[1].xxxx 238: FSLT TEMP[166].x, TEMP[9].xxxx, IMM[1].yyyy 239: UIF TEMP[166].xxxx 240: LRP TEMP[167].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 241: MOV TEMP[168].x, TEMP[167].xxxx 242: TEX TEMP[169].x, TEMP[168], SAMP[0], 1D 243: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[169].xxxx 244: MAD TEMP[170].xy, CONST[1].xyyy, IMM[3].wxxx, TEMP[4].xyyy 245: MOV TEMP[171].xy, TEMP[170].xyyy 246: TEX TEMP[172], TEMP[171], SAMP[1], 2D 247: MAD TEMP[0], TEMP[169].xxxx, TEMP[172], TEMP[0] 248: ENDIF 249: ADD TEMP[173].xy, IMM[4].wyyy, -TEMP[2].xyyy 250: DP2 TEMP[174].x, TEMP[173].xyyy, TEMP[173].xyyy 251: SQRT TEMP[175].x, TEMP[174].xxxx 252: MUL TEMP[9].x, TEMP[175].xxxx, IMM[1].xxxx 253: FSLT TEMP[176].x, TEMP[9].xxxx, IMM[1].yyyy 254: UIF TEMP[176].xxxx 255: LRP TEMP[177].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 256: MOV TEMP[178].x, TEMP[177].xxxx 257: TEX TEMP[179].x, TEMP[178], SAMP[0], 1D 258: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[179].xxxx 259: MAD TEMP[180].xy, CONST[1].xyyy, IMM[4].wyyy, TEMP[4].xyyy 260: MOV TEMP[181].xy, TEMP[180].xyyy 261: TEX TEMP[182], TEMP[181], SAMP[1], 2D 262: MAD TEMP[0], TEMP[179].xxxx, TEMP[182], TEMP[0] 263: ENDIF 264: ADD TEMP[183].xy, IMM[0].wxxx, -TEMP[2].xyyy 265: DP2 TEMP[184].x, TEMP[183].xyyy, TEMP[183].xyyy 266: SQRT TEMP[185].x, TEMP[184].xxxx 267: MUL TEMP[9].x, TEMP[185].xxxx, IMM[1].xxxx 268: FSLT TEMP[186].x, TEMP[9].xxxx, IMM[1].yyyy 269: UIF TEMP[186].xxxx 270: LRP TEMP[187].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 271: MOV TEMP[188].x, TEMP[187].xxxx 272: TEX TEMP[189].x, TEMP[188], SAMP[0], 1D 273: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[189].xxxx 274: MAD TEMP[190].xy, CONST[1].xyyy, IMM[0].wxxx, TEMP[4].xyyy 275: MOV TEMP[191].xy, TEMP[190].xyyy 276: TEX TEMP[192], TEMP[191], SAMP[1], 2D 277: MAD TEMP[0], TEMP[189].xxxx, TEMP[192], TEMP[0] 278: ENDIF 279: ADD TEMP[193].xy, IMM[3].yzzz, -TEMP[2].xyyy 280: DP2 TEMP[194].x, TEMP[193].xyyy, TEMP[193].xyyy 281: SQRT TEMP[195].x, TEMP[194].xxxx 282: MUL TEMP[9].x, TEMP[195].xxxx, IMM[1].xxxx 283: FSLT TEMP[196].x, TEMP[9].xxxx, IMM[1].yyyy 284: UIF TEMP[196].xxxx 285: LRP TEMP[197].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 286: MOV TEMP[198].x, TEMP[197].xxxx 287: TEX TEMP[199].x, TEMP[198], SAMP[0], 1D 288: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[199].xxxx 289: MAD TEMP[200].xy, CONST[1].xyyy, IMM[3].yzzz, TEMP[4].xyyy 290: MOV TEMP[201].xy, TEMP[200].xyyy 291: TEX TEMP[202], TEMP[201], SAMP[1], 2D 292: MAD TEMP[0], TEMP[199].xxxx, TEMP[202], TEMP[0] 293: ENDIF 294: ADD TEMP[203].xy, IMM[0].zxxx, -TEMP[2].xyyy 295: DP2 TEMP[204].x, TEMP[203].xyyy, TEMP[203].xyyy 296: SQRT TEMP[205].x, TEMP[204].xxxx 297: MUL TEMP[9].x, TEMP[205].xxxx, IMM[1].xxxx 298: LRP TEMP[206].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 299: MOV TEMP[207].x, TEMP[206].xxxx 300: TEX TEMP[208].x, TEMP[207], SAMP[0], 1D 301: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[208].xxxx 302: MAD TEMP[209].xy, CONST[1].xyyy, IMM[0].zxxx, TEMP[4].xyyy 303: MOV TEMP[210].xy, TEMP[209].xyyy 304: TEX TEMP[211], TEMP[210], SAMP[1], 2D 305: MAD TEMP[0], TEMP[208].xxxx, TEMP[211], TEMP[0] 306: MOV TEMP[212].xy, -TEMP[2].xyxx 307: DP2 TEMP[213].x, TEMP[212].xyyy, TEMP[212].xyyy 308: SQRT TEMP[214].x, TEMP[213].xxxx 309: MUL TEMP[9].x, TEMP[214].xxxx, IMM[1].xxxx 310: LRP TEMP[215].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 311: MOV TEMP[216].x, TEMP[215].xxxx 312: TEX TEMP[217].x, TEMP[216], SAMP[0], 1D 313: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[217].xxxx 314: MOV TEMP[218].xy, TEMP[4].xyyy 315: TEX TEMP[219], TEMP[218], SAMP[1], 2D 316: MAD TEMP[0], TEMP[217].xxxx, TEMP[219], TEMP[0] 317: ADD TEMP[220].xy, IMM[5].xyyy, -TEMP[2].xyyy 318: DP2 TEMP[221].x, TEMP[220].xyyy, TEMP[220].xyyy 319: SQRT TEMP[222].x, TEMP[221].xxxx 320: MUL TEMP[9].x, TEMP[222].xxxx, IMM[1].xxxx 321: LRP TEMP[223].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 322: MOV TEMP[224].x, TEMP[223].xxxx 323: TEX TEMP[225].x, TEMP[224], SAMP[0], 1D 324: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[225].xxxx 325: MAD TEMP[226].xy, CONST[1].xyyy, IMM[5].xyyy, TEMP[4].xyyy 326: MOV TEMP[227].xy, TEMP[226].xyyy 327: TEX TEMP[228], TEMP[227], SAMP[1], 2D 328: MAD TEMP[0], TEMP[225].xxxx, TEMP[228], TEMP[0] 329: ADD TEMP[229].xy, IMM[5].zyyy, -TEMP[2].xyyy 330: DP2 TEMP[230].x, TEMP[229].xyyy, TEMP[229].xyyy 331: SQRT TEMP[231].x, TEMP[230].xxxx 332: MUL TEMP[9].x, TEMP[231].xxxx, IMM[1].xxxx 333: LRP TEMP[232].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 334: MOV TEMP[233].x, TEMP[232].xxxx 335: TEX TEMP[234].x, TEMP[233], SAMP[0], 1D 336: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[234].xxxx 337: MAD TEMP[235].xy, CONST[1].xyyy, IMM[5].zyyy, TEMP[4].xyyy 338: MOV TEMP[236].xy, TEMP[235].xyyy 339: TEX TEMP[237], TEMP[236], SAMP[1], 2D 340: MAD TEMP[0], TEMP[234].xxxx, TEMP[237], TEMP[0] 341: ADD TEMP[238].xy, IMM[3].wzzz, -TEMP[2].xyyy 342: DP2 TEMP[239].x, TEMP[238].xyyy, TEMP[238].xyyy 343: SQRT TEMP[240].x, TEMP[239].xxxx 344: MUL TEMP[9].x, TEMP[240].xxxx, IMM[1].xxxx 345: FSLT TEMP[241].x, TEMP[9].xxxx, IMM[1].yyyy 346: UIF TEMP[241].xxxx 347: LRP TEMP[242].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 348: MOV TEMP[243].x, TEMP[242].xxxx 349: TEX TEMP[244].x, TEMP[243], SAMP[0], 1D 350: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[244].xxxx 351: MAD TEMP[245].xy, CONST[1].xyyy, IMM[3].wzzz, TEMP[4].xyyy 352: MOV TEMP[246].xy, TEMP[245].xyyy 353: TEX TEMP[247], TEMP[246], SAMP[1], 2D 354: MAD TEMP[0], TEMP[244].xxxx, TEMP[247], TEMP[0] 355: ENDIF 356: ADD TEMP[248].xy, IMM[5].wyyy, -TEMP[2].xyyy 357: DP2 TEMP[249].x, TEMP[248].xyyy, TEMP[248].xyyy 358: SQRT TEMP[250].x, TEMP[249].xxxx 359: MUL TEMP[9].x, TEMP[250].xxxx, IMM[1].xxxx 360: FSLT TEMP[251].x, TEMP[9].xxxx, IMM[1].yyyy 361: UIF TEMP[251].xxxx 362: LRP TEMP[252].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 363: MOV TEMP[253].x, TEMP[252].xxxx 364: TEX TEMP[254].x, TEMP[253], SAMP[0], 1D 365: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[254].xxxx 366: MAD TEMP[255].xy, CONST[1].xyyy, IMM[5].wyyy, TEMP[4].xyyy 367: MOV TEMP[256].xy, TEMP[255].xyyy 368: TEX TEMP[257], TEMP[256], SAMP[1], 2D 369: MAD TEMP[0], TEMP[254].xxxx, TEMP[257], TEMP[0] 370: ENDIF 371: ADD TEMP[258].xy, IMM[2].yxxx, -TEMP[2].xyyy 372: DP2 TEMP[259].x, TEMP[258].xyyy, TEMP[258].xyyy 373: SQRT TEMP[260].x, TEMP[259].xxxx 374: MUL TEMP[9].x, TEMP[260].xxxx, IMM[1].xxxx 375: FSLT TEMP[261].x, TEMP[9].xxxx, IMM[1].yyyy 376: UIF TEMP[261].xxxx 377: LRP TEMP[262].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 378: MOV TEMP[263].x, TEMP[262].xxxx 379: TEX TEMP[264].x, TEMP[263], SAMP[0], 1D 380: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[264].xxxx 381: MAD TEMP[265].xy, CONST[1].xyyy, IMM[2].yxxx, TEMP[4].xyyy 382: MOV TEMP[266].xy, TEMP[265].xyyy 383: TEX TEMP[267], TEMP[266], SAMP[1], 2D 384: MAD TEMP[0], TEMP[264].xxxx, TEMP[267], TEMP[0] 385: ENDIF 386: ADD TEMP[268].xy, IMM[2].wxxx, -TEMP[2].xyyy 387: DP2 TEMP[269].x, TEMP[268].xyyy, TEMP[268].xyyy 388: SQRT TEMP[270].x, TEMP[269].xxxx 389: MUL TEMP[9].x, TEMP[270].xxxx, IMM[1].xxxx 390: FSLT TEMP[271].x, TEMP[9].xxxx, IMM[1].yyyy 391: UIF TEMP[271].xxxx 392: LRP TEMP[272].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 393: MOV TEMP[273].x, TEMP[272].xxxx 394: TEX TEMP[274].x, TEMP[273], SAMP[0], 1D 395: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[274].xxxx 396: MAD TEMP[275].xy, CONST[1].xyyy, IMM[2].wxxx, TEMP[4].xyyy 397: MOV TEMP[276].xy, TEMP[275].xyyy 398: TEX TEMP[277], TEMP[276], SAMP[1], 2D 399: MAD TEMP[0], TEMP[274].xxxx, TEMP[277], TEMP[0] 400: ENDIF 401: ADD TEMP[278].xy, IMM[4].yxxx, -TEMP[2].xyyy 402: DP2 TEMP[279].x, TEMP[278].xyyy, TEMP[278].xyyy 403: SQRT TEMP[280].x, TEMP[279].xxxx 404: MUL TEMP[9].x, TEMP[280].xxxx, IMM[1].xxxx 405: LRP TEMP[281].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 406: MOV TEMP[282].x, TEMP[281].xxxx 407: TEX TEMP[283].x, TEMP[282], SAMP[0], 1D 408: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[283].xxxx 409: MAD TEMP[284].xy, CONST[1].xyyy, IMM[4].yxxx, TEMP[4].xyyy 410: MOV TEMP[285].xy, TEMP[284].xyyy 411: TEX TEMP[286], TEMP[285], SAMP[1], 2D 412: MAD TEMP[0], TEMP[283].xxxx, TEMP[286], TEMP[0] 413: ADD TEMP[287].xy, IMM[5].yxxx, -TEMP[2].xyyy 414: DP2 TEMP[288].x, TEMP[287].xyyy, TEMP[287].xyyy 415: SQRT TEMP[289].x, TEMP[288].xxxx 416: MUL TEMP[9].x, TEMP[289].xxxx, IMM[1].xxxx 417: LRP TEMP[290].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 418: MOV TEMP[291].x, TEMP[290].xxxx 419: TEX TEMP[292].x, TEMP[291], SAMP[0], 1D 420: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[292].xxxx 421: MAD TEMP[293].xy, CONST[1].xyyy, IMM[5].yxxx, TEMP[4].xyyy 422: MOV TEMP[294].xy, TEMP[293].xyyy 423: TEX TEMP[295], TEMP[294], SAMP[1], 2D 424: MAD TEMP[0], TEMP[292].xxxx, TEMP[295], TEMP[0] 425: ADD TEMP[296].xy, IMM[1].yyyy, -TEMP[2].xyyy 426: DP2 TEMP[297].x, TEMP[296].xyyy, TEMP[296].xyyy 427: SQRT TEMP[298].x, TEMP[297].xxxx 428: MUL TEMP[9].x, TEMP[298].xxxx, IMM[1].xxxx 429: LRP TEMP[299].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 430: MOV TEMP[300].x, TEMP[299].xxxx 431: TEX TEMP[301].x, TEMP[300], SAMP[0], 1D 432: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[301].xxxx 433: ADD TEMP[302].xy, TEMP[4].xyyy, CONST[1].xyyy 434: MOV TEMP[303].xy, TEMP[302].xyyy 435: TEX TEMP[304], TEMP[303], SAMP[1], 2D 436: MAD TEMP[0], TEMP[301].xxxx, TEMP[304], TEMP[0] 437: ADD TEMP[305].xy, IMM[2].zxxx, -TEMP[2].xyyy 438: DP2 TEMP[306].x, TEMP[305].xyyy, TEMP[305].xyyy 439: SQRT TEMP[307].x, TEMP[306].xxxx 440: MUL TEMP[9].x, TEMP[307].xxxx, IMM[1].xxxx 441: LRP TEMP[308].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 442: MOV TEMP[309].x, TEMP[308].xxxx 443: TEX TEMP[310].x, TEMP[309], SAMP[0], 1D 444: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[310].xxxx 445: MAD TEMP[311].xy, CONST[1].xyyy, IMM[2].zxxx, TEMP[4].xyyy 446: MOV TEMP[312].xy, TEMP[311].xyyy 447: TEX TEMP[313], TEMP[312], SAMP[1], 2D 448: MAD TEMP[0], TEMP[310].xxxx, TEMP[313], TEMP[0] 449: ADD TEMP[314].xy, IMM[6].xyyy, -TEMP[2].xyyy 450: DP2 TEMP[315].x, TEMP[314].xyyy, TEMP[314].xyyy 451: SQRT TEMP[316].x, TEMP[315].xxxx 452: MUL TEMP[9].x, TEMP[316].xxxx, IMM[1].xxxx 453: FSLT TEMP[317].x, TEMP[9].xxxx, IMM[1].yyyy 454: UIF TEMP[317].xxxx 455: LRP TEMP[318].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 456: MOV TEMP[319].x, TEMP[318].xxxx 457: TEX TEMP[320].x, TEMP[319], SAMP[0], 1D 458: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[320].xxxx 459: MAD TEMP[321].xy, CONST[1].xyyy, IMM[6].xyyy, TEMP[4].xyyy 460: MOV TEMP[322].xy, TEMP[321].xyyy 461: TEX TEMP[323], TEMP[322], SAMP[1], 2D 462: MAD TEMP[0], TEMP[320].xxxx, TEMP[323], TEMP[0] 463: ENDIF 464: ADD TEMP[324].xy, IMM[4].wxxx, -TEMP[2].xyyy 465: DP2 TEMP[325].x, TEMP[324].xyyy, TEMP[324].xyyy 466: SQRT TEMP[326].x, TEMP[325].xxxx 467: MUL TEMP[9].x, TEMP[326].xxxx, IMM[1].xxxx 468: FSLT TEMP[327].x, TEMP[9].xxxx, IMM[1].yyyy 469: UIF TEMP[327].xxxx 470: LRP TEMP[328].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 471: MOV TEMP[329].x, TEMP[328].xxxx 472: TEX TEMP[330].x, TEMP[329], SAMP[0], 1D 473: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[330].xxxx 474: MAD TEMP[331].xy, CONST[1].xyyy, IMM[4].wxxx, TEMP[4].xyyy 475: MOV TEMP[332].xy, TEMP[331].xyyy 476: TEX TEMP[333], TEMP[332], SAMP[1], 2D 477: MAD TEMP[0], TEMP[330].xxxx, TEMP[333], TEMP[0] 478: ENDIF 479: ADD TEMP[334].xy, IMM[2].yzzz, -TEMP[2].xyyy 480: DP2 TEMP[335].x, TEMP[334].xyyy, TEMP[334].xyyy 481: SQRT TEMP[336].x, TEMP[335].xxxx 482: MUL TEMP[9].x, TEMP[336].xxxx, IMM[1].xxxx 483: FSLT TEMP[337].x, TEMP[9].xxxx, IMM[1].yyyy 484: UIF TEMP[337].xxxx 485: LRP TEMP[338].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 486: MOV TEMP[339].x, TEMP[338].xxxx 487: TEX TEMP[340].x, TEMP[339], SAMP[0], 1D 488: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[340].xxxx 489: MAD TEMP[341].xy, CONST[1].xyyy, IMM[2].yzzz, TEMP[4].xyyy 490: MOV TEMP[342].xy, TEMP[341].xyyy 491: TEX TEMP[343], TEMP[342], SAMP[1], 2D 492: MAD TEMP[0], TEMP[340].xxxx, TEMP[343], TEMP[0] 493: ENDIF 494: ADD TEMP[344].xy, IMM[2].wzzz, -TEMP[2].xyyy 495: DP2 TEMP[345].x, TEMP[344].xyyy, TEMP[344].xyyy 496: SQRT TEMP[346].x, TEMP[345].xxxx 497: MUL TEMP[9].x, TEMP[346].xxxx, IMM[1].xxxx 498: FSLT TEMP[347].x, TEMP[9].xxxx, IMM[1].yyyy 499: UIF TEMP[347].xxxx 500: LRP TEMP[348].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 501: MOV TEMP[349].x, TEMP[348].xxxx 502: TEX TEMP[350].x, TEMP[349], SAMP[0], 1D 503: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[350].xxxx 504: MAD TEMP[351].xy, CONST[1].xyyy, IMM[2].wzzz, TEMP[4].xyyy 505: MOV TEMP[352].xy, TEMP[351].xyyy 506: TEX TEMP[353], TEMP[352], SAMP[1], 2D 507: MAD TEMP[0], TEMP[350].xxxx, TEMP[353], TEMP[0] 508: ENDIF 509: ADD TEMP[354].xy, IMM[4].yzzz, -TEMP[2].xyyy 510: DP2 TEMP[355].x, TEMP[354].xyyy, TEMP[354].xyyy 511: SQRT TEMP[356].x, TEMP[355].xxxx 512: MUL TEMP[9].x, TEMP[356].xxxx, IMM[1].xxxx 513: LRP TEMP[357].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 514: MOV TEMP[358].x, TEMP[357].xxxx 515: TEX TEMP[359].x, TEMP[358], SAMP[0], 1D 516: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[359].xxxx 517: MAD TEMP[360].xy, CONST[1].xyyy, IMM[4].yzzz, TEMP[4].xyyy 518: MOV TEMP[361].xy, TEMP[360].xyyy 519: TEX TEMP[362], TEMP[361], SAMP[1], 2D 520: MAD TEMP[0], TEMP[359].xxxx, TEMP[362], TEMP[0] 521: ADD TEMP[363].xy, IMM[5].yzzz, -TEMP[2].xyyy 522: DP2 TEMP[364].x, TEMP[363].xyyy, TEMP[363].xyyy 523: SQRT TEMP[365].x, TEMP[364].xxxx 524: MUL TEMP[9].x, TEMP[365].xxxx, IMM[1].xxxx 525: LRP TEMP[366].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 526: MOV TEMP[367].x, TEMP[366].xxxx 527: TEX TEMP[368].x, TEMP[367], SAMP[0], 1D 528: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[368].xxxx 529: MAD TEMP[369].xy, CONST[1].xyyy, IMM[5].yzzz, TEMP[4].xyyy 530: MOV TEMP[370].xy, TEMP[369].xyyy 531: TEX TEMP[371], TEMP[370], SAMP[1], 2D 532: MAD TEMP[0], TEMP[368].xxxx, TEMP[371], TEMP[0] 533: ADD TEMP[372].xy, IMM[2].xzzz, -TEMP[2].xyyy 534: DP2 TEMP[373].x, TEMP[372].xyyy, TEMP[372].xyyy 535: SQRT TEMP[374].x, TEMP[373].xxxx 536: MUL TEMP[9].x, TEMP[374].xxxx, IMM[1].xxxx 537: LRP TEMP[375].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 538: MOV TEMP[376].x, TEMP[375].xxxx 539: TEX TEMP[377].x, TEMP[376], SAMP[0], 1D 540: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[377].xxxx 541: MAD TEMP[378].xy, CONST[1].xyyy, IMM[2].xzzz, TEMP[4].xyyy 542: MOV TEMP[379].xy, TEMP[378].xyyy 543: TEX TEMP[380], TEMP[379], SAMP[1], 2D 544: MAD TEMP[0], TEMP[377].xxxx, TEMP[380], TEMP[0] 545: ADD TEMP[381].xy, IMM[2].zzzz, -TEMP[2].xyyy 546: DP2 TEMP[382].x, TEMP[381].xyyy, TEMP[381].xyyy 547: SQRT TEMP[383].x, TEMP[382].xxxx 548: MUL TEMP[9].x, TEMP[383].xxxx, IMM[1].xxxx 549: LRP TEMP[384].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 550: MOV TEMP[385].x, TEMP[384].xxxx 551: TEX TEMP[386].x, TEMP[385], SAMP[0], 1D 552: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[386].xxxx 553: MAD TEMP[387].xy, CONST[1].xyyy, IMM[2].zzzz, TEMP[4].xyyy 554: MOV TEMP[388].xy, TEMP[387].xyyy 555: TEX TEMP[389], TEMP[388], SAMP[1], 2D 556: MAD TEMP[0], TEMP[386].xxxx, TEMP[389], TEMP[0] 557: ADD TEMP[390].xy, IMM[6].xzzz, -TEMP[2].xyyy 558: DP2 TEMP[391].x, TEMP[390].xyyy, TEMP[390].xyyy 559: SQRT TEMP[392].x, TEMP[391].xxxx 560: MUL TEMP[9].x, TEMP[392].xxxx, IMM[1].xxxx 561: FSLT TEMP[393].x, TEMP[9].xxxx, IMM[1].yyyy 562: UIF TEMP[393].xxxx 563: LRP TEMP[394].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 564: MOV TEMP[395].x, TEMP[394].xxxx 565: TEX TEMP[396].x, TEMP[395], SAMP[0], 1D 566: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[396].xxxx 567: MAD TEMP[397].xy, CONST[1].xyyy, IMM[6].xzzz, TEMP[4].xyyy 568: MOV TEMP[398].xy, TEMP[397].xyyy 569: TEX TEMP[399], TEMP[398], SAMP[1], 2D 570: MAD TEMP[0], TEMP[396].xxxx, TEMP[399], TEMP[0] 571: ENDIF 572: ADD TEMP[400].xy, IMM[4].wzzz, -TEMP[2].xyyy 573: DP2 TEMP[401].x, TEMP[400].xyyy, TEMP[400].xyyy 574: SQRT TEMP[402].x, TEMP[401].xxxx 575: MUL TEMP[9].x, TEMP[402].xxxx, IMM[1].xxxx 576: FSLT TEMP[403].x, TEMP[9].xxxx, IMM[1].yyyy 577: UIF TEMP[403].xxxx 578: LRP TEMP[404].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 579: MOV TEMP[405].x, TEMP[404].xxxx 580: TEX TEMP[406].x, TEMP[405], SAMP[0], 1D 581: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[406].xxxx 582: MAD TEMP[407].xy, CONST[1].xyyy, IMM[4].wzzz, TEMP[4].xyyy 583: MOV TEMP[408].xy, TEMP[407].xyyy 584: TEX TEMP[409], TEMP[408], SAMP[1], 2D 585: MAD TEMP[0], TEMP[406].xxxx, TEMP[409], TEMP[0] 586: ENDIF 587: ADD TEMP[410].xy, IMM[3].ywww, -TEMP[2].xyyy 588: DP2 TEMP[411].x, TEMP[410].xyyy, TEMP[410].xyyy 589: SQRT TEMP[412].x, TEMP[411].xxxx 590: MUL TEMP[9].x, TEMP[412].xxxx, IMM[1].xxxx 591: FSLT TEMP[413].x, TEMP[9].xxxx, IMM[1].yyyy 592: UIF TEMP[413].xxxx 593: LRP TEMP[414].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 594: MOV TEMP[415].x, TEMP[414].xxxx 595: TEX TEMP[416].x, TEMP[415], SAMP[0], 1D 596: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[416].xxxx 597: MAD TEMP[417].xy, CONST[1].xyyy, IMM[3].ywww, TEMP[4].xyyy 598: MOV TEMP[418].xy, TEMP[417].xyyy 599: TEX TEMP[419], TEMP[418], SAMP[1], 2D 600: MAD TEMP[0], TEMP[416].xxxx, TEMP[419], TEMP[0] 601: ENDIF 602: ADD TEMP[420].xy, IMM[3].xwww, -TEMP[2].xyyy 603: DP2 TEMP[421].x, TEMP[420].xyyy, TEMP[420].xyyy 604: SQRT TEMP[422].x, TEMP[421].xxxx 605: MUL TEMP[9].x, TEMP[422].xxxx, IMM[1].xxxx 606: FSLT TEMP[423].x, TEMP[9].xxxx, IMM[1].yyyy 607: UIF TEMP[423].xxxx 608: LRP TEMP[424].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 609: MOV TEMP[425].x, TEMP[424].xxxx 610: TEX TEMP[426].x, TEMP[425], SAMP[0], 1D 611: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[426].xxxx 612: MAD TEMP[427].xy, CONST[1].xyyy, IMM[3].xwww, TEMP[4].xyyy 613: MOV TEMP[428].xy, TEMP[427].xyyy 614: TEX TEMP[429], TEMP[428], SAMP[1], 2D 615: MAD TEMP[0], TEMP[426].xxxx, TEMP[429], TEMP[0] 616: ENDIF 617: ADD TEMP[430].xy, IMM[3].zwww, -TEMP[2].xyyy 618: DP2 TEMP[431].x, TEMP[430].xyyy, TEMP[430].xyyy 619: SQRT TEMP[432].x, TEMP[431].xxxx 620: MUL TEMP[9].x, TEMP[432].xxxx, IMM[1].xxxx 621: FSLT TEMP[433].x, TEMP[9].xxxx, IMM[1].yyyy 622: UIF TEMP[433].xxxx 623: LRP TEMP[434].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 624: MOV TEMP[435].x, TEMP[434].xxxx 625: TEX TEMP[436].x, TEMP[435], SAMP[0], 1D 626: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[436].xxxx 627: MAD TEMP[437].xy, CONST[1].xyyy, IMM[3].zwww, TEMP[4].xyyy 628: MOV TEMP[438].xy, TEMP[437].xyyy 629: TEX TEMP[439], TEMP[438], SAMP[1], 2D 630: MAD TEMP[0], TEMP[436].xxxx, TEMP[439], TEMP[0] 631: ENDIF 632: ADD TEMP[440].xy, IMM[6].yxxx, -TEMP[2].xyyy 633: DP2 TEMP[441].x, TEMP[440].xyyy, TEMP[440].xyyy 634: SQRT TEMP[442].x, TEMP[441].xxxx 635: MUL TEMP[9].x, TEMP[442].xxxx, IMM[1].xxxx 636: FSLT TEMP[443].x, TEMP[9].xxxx, IMM[1].yyyy 637: UIF TEMP[443].xxxx 638: LRP TEMP[444].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 639: MOV TEMP[445].x, TEMP[444].xxxx 640: TEX TEMP[446].x, TEMP[445], SAMP[0], 1D 641: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[446].xxxx 642: MAD TEMP[447].xy, CONST[1].xyyy, IMM[6].yxxx, TEMP[4].xyyy 643: MOV TEMP[448].xy, TEMP[447].xyyy 644: TEX TEMP[449], TEMP[448], SAMP[1], 2D 645: MAD TEMP[0], TEMP[446].xxxx, TEMP[449], TEMP[0] 646: ENDIF 647: ADD TEMP[450].xy, IMM[6].zxxx, -TEMP[2].xyyy 648: DP2 TEMP[451].x, TEMP[450].xyyy, TEMP[450].xyyy 649: SQRT TEMP[452].x, TEMP[451].xxxx 650: MUL TEMP[9].x, TEMP[452].xxxx, IMM[1].xxxx 651: FSLT TEMP[453].x, TEMP[9].xxxx, IMM[1].yyyy 652: UIF TEMP[453].xxxx 653: LRP TEMP[454].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 654: MOV TEMP[455].x, TEMP[454].xxxx 655: TEX TEMP[456].x, TEMP[455], SAMP[0], 1D 656: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[456].xxxx 657: MAD TEMP[457].xy, CONST[1].xyyy, IMM[6].zxxx, TEMP[4].xyyy 658: MOV TEMP[458].xy, TEMP[457].xyyy 659: TEX TEMP[459], TEMP[458], SAMP[1], 2D 660: MAD TEMP[0], TEMP[456].xxxx, TEMP[459], TEMP[0] 661: ENDIF 662: ADD TEMP[460].xy, IMM[3].wwww, -TEMP[2].xyyy 663: DP2 TEMP[461].x, TEMP[460].xyyy, TEMP[460].xyyy 664: SQRT TEMP[462].x, TEMP[461].xxxx 665: MUL TEMP[9].x, TEMP[462].xxxx, IMM[1].xxxx 666: FSLT TEMP[463].x, TEMP[9].xxxx, IMM[1].yyyy 667: UIF TEMP[463].xxxx 668: LRP TEMP[464].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 669: MOV TEMP[465].x, TEMP[464].xxxx 670: TEX TEMP[466].x, TEMP[465], SAMP[0], 1D 671: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[466].xxxx 672: MAD TEMP[467].xy, CONST[1].xyyy, IMM[3].wwww, TEMP[4].xyyy 673: MOV TEMP[468].xy, TEMP[467].xyyy 674: TEX TEMP[469], TEMP[468], SAMP[1], 2D 675: MAD TEMP[0], TEMP[466].xxxx, TEMP[469], TEMP[0] 676: ENDIF 677: ADD TEMP[470].xy, IMM[4].ywww, -TEMP[2].xyyy 678: DP2 TEMP[471].x, TEMP[470].xyyy, TEMP[470].xyyy 679: SQRT TEMP[472].x, TEMP[471].xxxx 680: MUL TEMP[9].x, TEMP[472].xxxx, IMM[1].xxxx 681: FSLT TEMP[473].x, TEMP[9].xxxx, IMM[1].yyyy 682: UIF TEMP[473].xxxx 683: LRP TEMP[474].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 684: MOV TEMP[475].x, TEMP[474].xxxx 685: TEX TEMP[476].x, TEMP[475], SAMP[0], 1D 686: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[476].xxxx 687: MAD TEMP[477].xy, CONST[1].xyyy, IMM[4].ywww, TEMP[4].xyyy 688: MOV TEMP[478].xy, TEMP[477].xyyy 689: TEX TEMP[479], TEMP[478], SAMP[1], 2D 690: MAD TEMP[0], TEMP[476].xxxx, TEMP[479], TEMP[0] 691: ENDIF 692: ADD TEMP[480].xy, IMM[5].ywww, -TEMP[2].xyyy 693: DP2 TEMP[481].x, TEMP[480].xyyy, TEMP[480].xyyy 694: SQRT TEMP[482].x, TEMP[481].xxxx 695: MUL TEMP[9].x, TEMP[482].xxxx, IMM[1].xxxx 696: FSLT TEMP[483].x, TEMP[9].xxxx, IMM[1].yyyy 697: UIF TEMP[483].xxxx 698: LRP TEMP[484].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 699: MOV TEMP[485].x, TEMP[484].xxxx 700: TEX TEMP[486].x, TEMP[485], SAMP[0], 1D 701: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[486].xxxx 702: MAD TEMP[487].xy, CONST[1].xyyy, IMM[5].ywww, TEMP[4].xyyy 703: MOV TEMP[488].xy, TEMP[487].xyyy 704: TEX TEMP[489], TEMP[488], SAMP[1], 2D 705: MAD TEMP[0], TEMP[486].xxxx, TEMP[489], TEMP[0] 706: ENDIF 707: ADD TEMP[490].xy, IMM[4].xwww, -TEMP[2].xyyy 708: DP2 TEMP[491].x, TEMP[490].xyyy, TEMP[490].xyyy 709: SQRT TEMP[492].x, TEMP[491].xxxx 710: MUL TEMP[9].x, TEMP[492].xxxx, IMM[1].xxxx 711: FSLT TEMP[493].x, TEMP[9].xxxx, IMM[1].yyyy 712: UIF TEMP[493].xxxx 713: LRP TEMP[494].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 714: MOV TEMP[495].x, TEMP[494].xxxx 715: TEX TEMP[496].x, TEMP[495], SAMP[0], 1D 716: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[496].xxxx 717: MAD TEMP[497].xy, CONST[1].xyyy, IMM[4].xwww, TEMP[4].xyyy 718: MOV TEMP[498].xy, TEMP[497].xyyy 719: TEX TEMP[499], TEMP[498], SAMP[1], 2D 720: MAD TEMP[0], TEMP[496].xxxx, TEMP[499], TEMP[0] 721: ENDIF 722: ADD TEMP[500].xy, IMM[4].zwww, -TEMP[2].xyyy 723: DP2 TEMP[501].x, TEMP[500].xyyy, TEMP[500].xyyy 724: SQRT TEMP[502].x, TEMP[501].xxxx 725: MUL TEMP[9].x, TEMP[502].xxxx, IMM[1].xxxx 726: FSLT TEMP[503].x, TEMP[9].xxxx, IMM[1].yyyy 727: UIF TEMP[503].xxxx 728: LRP TEMP[504].x, TEMP[9].xxxx, IMM[1].wwww, IMM[1].zzzz 729: MOV TEMP[505].x, TEMP[504].xxxx 730: TEX TEMP[506].x, TEMP[505], SAMP[0], 1D 731: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[506].xxxx 732: MAD TEMP[507].xy, CONST[1].xyyy, IMM[4].zwww, TEMP[4].xyyy 733: MOV TEMP[508].xy, TEMP[507].xyyy 734: TEX TEMP[509], TEMP[508], SAMP[1], 2D 735: MAD TEMP[0], TEMP[506].xxxx, TEMP[509], TEMP[0] 736: ENDIF 737: RCP TEMP[510], TEMP[5].xxxx 738: MUL TEMP[0].xy, TEMP[0], TEMP[510] 739: MOV TEMP[0].xy, TEMP[0].xyxx 740: MOV TEMP[0].z, IMM[0].xxxx 741: MOV TEMP[0].w, IMM[1].yyyy 742: MOV OUT[0], TEMP[0] 743: END radeonsi: Compiling shader 21 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %21 = bitcast <2 x i32> %6 to <2 x float> %22 = extractelement <2 x float> %21, i32 0 %23 = extractelement <2 x float> %21, i32 1 %24 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 0, i32 0, i32 %4) #2 %25 = call nsz float @llvm.amdgcn.interp.p2(float %24, float %23, i32 0, i32 0, i32 %4) #2 %26 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %1, i64 0, i64 16, !amdgpu.uniform !0 %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !invariant.load !0 %28 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %27, i32 0) %29 = fmul nsz float %25, %28 %30 = fadd nsz float %29, -5.000000e-01 %31 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 1, i32 0, i32 %4) #2 %32 = call nsz float @llvm.amdgcn.interp.p2(float %31, float %23, i32 1, i32 0, i32 %4) #2 %33 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %27, i32 4) %34 = fmul nsz float %32, %33 %35 = fadd nsz float %34, -5.000000e-01 %36 = call nsz float @llvm.floor.f32(float %30) #2 %37 = fsub nsz float %30, %36 %38 = call nsz float @llvm.floor.f32(float %35) #2 %39 = fsub nsz float %35, %38 %40 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %27, i32 16) %41 = fmul nsz float %37, %40 %42 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %27, i32 20) %43 = fmul nsz float %39, %42 %44 = fsub nsz float %25, %41 %45 = fsub nsz float %32, %43 %46 = fsub nsz float -1.000000e+00, %37 %47 = fsub nsz float -3.000000e+00, %39 %48 = fmul nsz float %46, %46 %49 = fmul nsz float %47, %47 %50 = fadd nsz float %48, %49 %51 = call nsz float @llvm.sqrt.f32(float %50) #2 %52 = fmul nsz float %51, 0x3FD3C36BE0000000 %53 = fcmp nsz olt float %52, 1.000000e+00 br i1 %53, label %if11, label %endif20 if11: ; preds = %main_body %54 = fsub nsz float 1.000000e+00, %52 %55 = fmul nsz float %52, 0x3FEFC00000000000 %56 = fmul nsz float %54, 7.812500e-03 %57 = fadd nsz float %55, %56 %58 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %59 = load <8 x i32>, <8 x i32> addrspace(2)* %58, align 32, !invariant.load !0 %60 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %61 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %60, i64 0, i64 35, !amdgpu.uniform !0 %62 = load <4 x i32>, <4 x i32> addrspace(2)* %61, align 16, !invariant.load !0 %63 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %57, <8 x i32> %59, <4 x i32> %62, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %64 = fsub nsz float %44, %40 %65 = fmul nsz float %42, -3.000000e+00 %66 = fadd nsz float %65, %45 %67 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %68 = load <8 x i32>, <8 x i32> addrspace(2)* %67, align 32, !invariant.load !0 %69 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %60, i64 0, i64 39, !amdgpu.uniform !0 %70 = load <4 x i32>, <4 x i32> addrspace(2)* %69, align 16, !invariant.load !0 %71 = bitcast float %64 to i32 %72 = bitcast float %66 to i32 %73 = insertelement <2 x i32> undef, i32 %71, i32 0 %74 = insertelement <2 x i32> %73, i32 %72, i32 1 %75 = bitcast <2 x i32> %74 to <2 x float> %76 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %75, <8 x i32> %68, <4 x i32> %70, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %77 = extractelement <4 x float> %76, i32 0 %78 = extractelement <4 x float> %76, i32 1 %79 = fmul nsz float %63, %77 %80 = fmul nsz float %63, %78 br label %endif20 endif20: ; preds = %if11, %main_body %TEMP5.x.0 = phi float [ %63, %if11 ], [ 0.000000e+00, %main_body ] %TEMP0.y.0 = phi float [ %80, %if11 ], [ 0.000000e+00, %main_body ] %TEMP0.x.0 = phi float [ %79, %if11 ], [ 0.000000e+00, %main_body ] %81 = fmul nsz float %37, %37 %82 = fadd nsz float %81, %49 %83 = call nsz float @llvm.sqrt.f32(float %82) #2 %84 = fmul nsz float %83, 0x3FD3C36BE0000000 %85 = fcmp nsz olt float %84, 1.000000e+00 br i1 %85, label %if26, label %endif35 if26: ; preds = %endif20 %86 = fsub nsz float 1.000000e+00, %84 %87 = fmul nsz float %84, 0x3FEFC00000000000 %88 = fmul nsz float %86, 7.812500e-03 %89 = fadd nsz float %87, %88 %90 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %91 = load <8 x i32>, <8 x i32> addrspace(2)* %90, align 32, !invariant.load !0 %92 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %93 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %92, i64 0, i64 35, !amdgpu.uniform !0 %94 = load <4 x i32>, <4 x i32> addrspace(2)* %93, align 16, !invariant.load !0 %95 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %89, <8 x i32> %91, <4 x i32> %94, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %96 = fadd nsz float %TEMP5.x.0, %95 %97 = fmul nsz float %40, 0.000000e+00 %98 = fadd nsz float %97, %44 %99 = fmul nsz float %42, -3.000000e+00 %100 = fadd nsz float %99, %45 %101 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %102 = load <8 x i32>, <8 x i32> addrspace(2)* %101, align 32, !invariant.load !0 %103 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %92, i64 0, i64 39, !amdgpu.uniform !0 %104 = load <4 x i32>, <4 x i32> addrspace(2)* %103, align 16, !invariant.load !0 %105 = bitcast float %98 to i32 %106 = bitcast float %100 to i32 %107 = insertelement <2 x i32> undef, i32 %105, i32 0 %108 = insertelement <2 x i32> %107, i32 %106, i32 1 %109 = bitcast <2 x i32> %108 to <2 x float> %110 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %109, <8 x i32> %102, <4 x i32> %104, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %111 = extractelement <4 x float> %110, i32 0 %112 = extractelement <4 x float> %110, i32 1 %113 = fmul nsz float %95, %111 %114 = fadd nsz float %113, %TEMP0.x.0 %115 = fmul nsz float %95, %112 %116 = fadd nsz float %115, %TEMP0.y.0 br label %endif35 endif35: ; preds = %if26, %endif20 %TEMP5.x.1 = phi float [ %96, %if26 ], [ %TEMP5.x.0, %endif20 ] %TEMP0.y.1 = phi float [ %116, %if26 ], [ %TEMP0.y.0, %endif20 ] %TEMP0.x.1 = phi float [ %114, %if26 ], [ %TEMP0.x.0, %endif20 ] %117 = fsub nsz float 1.000000e+00, %37 %118 = fmul nsz float %117, %117 %119 = fadd nsz float %118, %49 %120 = call nsz float @llvm.sqrt.f32(float %119) #2 %121 = fmul nsz float %120, 0x3FD3C36BE0000000 %122 = fcmp nsz olt float %121, 1.000000e+00 br i1 %122, label %if41, label %endif50 if41: ; preds = %endif35 %123 = fsub nsz float 1.000000e+00, %121 %124 = fmul nsz float %121, 0x3FEFC00000000000 %125 = fmul nsz float %123, 7.812500e-03 %126 = fadd nsz float %124, %125 %127 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %128 = load <8 x i32>, <8 x i32> addrspace(2)* %127, align 32, !invariant.load !0 %129 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %130 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %129, i64 0, i64 35, !amdgpu.uniform !0 %131 = load <4 x i32>, <4 x i32> addrspace(2)* %130, align 16, !invariant.load !0 %132 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %126, <8 x i32> %128, <4 x i32> %131, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %133 = fadd nsz float %TEMP5.x.1, %132 %134 = fadd nsz float %40, %44 %135 = fmul nsz float %42, -3.000000e+00 %136 = fadd nsz float %135, %45 %137 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %138 = load <8 x i32>, <8 x i32> addrspace(2)* %137, align 32, !invariant.load !0 %139 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %129, i64 0, i64 39, !amdgpu.uniform !0 %140 = load <4 x i32>, <4 x i32> addrspace(2)* %139, align 16, !invariant.load !0 %141 = bitcast float %134 to i32 %142 = bitcast float %136 to i32 %143 = insertelement <2 x i32> undef, i32 %141, i32 0 %144 = insertelement <2 x i32> %143, i32 %142, i32 1 %145 = bitcast <2 x i32> %144 to <2 x float> %146 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %145, <8 x i32> %138, <4 x i32> %140, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %147 = extractelement <4 x float> %146, i32 0 %148 = extractelement <4 x float> %146, i32 1 %149 = fmul nsz float %132, %147 %150 = fadd nsz float %149, %TEMP0.x.1 %151 = fmul nsz float %132, %148 %152 = fadd nsz float %151, %TEMP0.y.1 br label %endif50 endif50: ; preds = %if41, %endif35 %TEMP5.x.2 = phi float [ %133, %if41 ], [ %TEMP5.x.1, %endif35 ] %TEMP0.y.2 = phi float [ %152, %if41 ], [ %TEMP0.y.1, %endif35 ] %TEMP0.x.2 = phi float [ %150, %if41 ], [ %TEMP0.x.1, %endif35 ] %153 = fsub nsz float 2.000000e+00, %37 %154 = fmul nsz float %153, %153 %155 = fadd nsz float %154, %49 %156 = call nsz float @llvm.sqrt.f32(float %155) #2 %157 = fmul nsz float %156, 0x3FD3C36BE0000000 %158 = fcmp nsz olt float %157, 1.000000e+00 br i1 %158, label %if56, label %endif65 if56: ; preds = %endif50 %159 = fsub nsz float 1.000000e+00, %157 %160 = fmul nsz float %157, 0x3FEFC00000000000 %161 = fmul nsz float %159, 7.812500e-03 %162 = fadd nsz float %160, %161 %163 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %164 = load <8 x i32>, <8 x i32> addrspace(2)* %163, align 32, !invariant.load !0 %165 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %166 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %165, i64 0, i64 35, !amdgpu.uniform !0 %167 = load <4 x i32>, <4 x i32> addrspace(2)* %166, align 16, !invariant.load !0 %168 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %162, <8 x i32> %164, <4 x i32> %167, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %169 = fadd nsz float %TEMP5.x.2, %168 %170 = fmul nsz float %40, 2.000000e+00 %171 = fadd nsz float %170, %44 %172 = fmul nsz float %42, -3.000000e+00 %173 = fadd nsz float %172, %45 %174 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %175 = load <8 x i32>, <8 x i32> addrspace(2)* %174, align 32, !invariant.load !0 %176 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %165, i64 0, i64 39, !amdgpu.uniform !0 %177 = load <4 x i32>, <4 x i32> addrspace(2)* %176, align 16, !invariant.load !0 %178 = bitcast float %171 to i32 %179 = bitcast float %173 to i32 %180 = insertelement <2 x i32> undef, i32 %178, i32 0 %181 = insertelement <2 x i32> %180, i32 %179, i32 1 %182 = bitcast <2 x i32> %181 to <2 x float> %183 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %182, <8 x i32> %175, <4 x i32> %177, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %184 = extractelement <4 x float> %183, i32 0 %185 = extractelement <4 x float> %183, i32 1 %186 = fmul nsz float %168, %184 %187 = fadd nsz float %186, %TEMP0.x.2 %188 = fmul nsz float %168, %185 %189 = fadd nsz float %188, %TEMP0.y.2 br label %endif65 endif65: ; preds = %if56, %endif50 %TEMP5.x.3 = phi float [ %169, %if56 ], [ %TEMP5.x.2, %endif50 ] %TEMP0.y.3 = phi float [ %189, %if56 ], [ %TEMP0.y.2, %endif50 ] %TEMP0.x.3 = phi float [ %187, %if56 ], [ %TEMP0.x.2, %endif50 ] %190 = fsub nsz float -2.000000e+00, %37 %191 = fsub nsz float -2.000000e+00, %39 %192 = fmul nsz float %190, %190 %193 = fmul nsz float %191, %191 %194 = fadd nsz float %192, %193 %195 = call nsz float @llvm.sqrt.f32(float %194) #2 %196 = fmul nsz float %195, 0x3FD3C36BE0000000 %197 = fcmp nsz olt float %196, 1.000000e+00 br i1 %197, label %if71, label %endif80 if71: ; preds = %endif65 %198 = fsub nsz float 1.000000e+00, %196 %199 = fmul nsz float %196, 0x3FEFC00000000000 %200 = fmul nsz float %198, 7.812500e-03 %201 = fadd nsz float %199, %200 %202 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %203 = load <8 x i32>, <8 x i32> addrspace(2)* %202, align 32, !invariant.load !0 %204 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %205 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %204, i64 0, i64 35, !amdgpu.uniform !0 %206 = load <4 x i32>, <4 x i32> addrspace(2)* %205, align 16, !invariant.load !0 %207 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %201, <8 x i32> %203, <4 x i32> %206, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %208 = fadd nsz float %TEMP5.x.3, %207 %209 = fmul nsz float %40, -2.000000e+00 %210 = fadd nsz float %209, %44 %211 = fmul nsz float %42, -2.000000e+00 %212 = fadd nsz float %211, %45 %213 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %214 = load <8 x i32>, <8 x i32> addrspace(2)* %213, align 32, !invariant.load !0 %215 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %204, i64 0, i64 39, !amdgpu.uniform !0 %216 = load <4 x i32>, <4 x i32> addrspace(2)* %215, align 16, !invariant.load !0 %217 = bitcast float %210 to i32 %218 = bitcast float %212 to i32 %219 = insertelement <2 x i32> undef, i32 %217, i32 0 %220 = insertelement <2 x i32> %219, i32 %218, i32 1 %221 = bitcast <2 x i32> %220 to <2 x float> %222 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %221, <8 x i32> %214, <4 x i32> %216, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %223 = extractelement <4 x float> %222, i32 0 %224 = extractelement <4 x float> %222, i32 1 %225 = fmul nsz float %207, %223 %226 = fadd nsz float %225, %TEMP0.x.3 %227 = fmul nsz float %207, %224 %228 = fadd nsz float %227, %TEMP0.y.3 br label %endif80 endif80: ; preds = %if71, %endif65 %TEMP5.x.4 = phi float [ %208, %if71 ], [ %TEMP5.x.3, %endif65 ] %TEMP0.y.4 = phi float [ %228, %if71 ], [ %TEMP0.y.3, %endif65 ] %TEMP0.x.4 = phi float [ %226, %if71 ], [ %TEMP0.x.3, %endif65 ] %229 = fadd nsz float %48, %193 %230 = call nsz float @llvm.sqrt.f32(float %229) #2 %231 = fmul nsz float %230, 0x3FD3C36BE0000000 %232 = fcmp nsz olt float %231, 1.000000e+00 br i1 %232, label %if86, label %endif95 if86: ; preds = %endif80 %233 = fsub nsz float 1.000000e+00, %231 %234 = fmul nsz float %231, 0x3FEFC00000000000 %235 = fmul nsz float %233, 7.812500e-03 %236 = fadd nsz float %234, %235 %237 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %238 = load <8 x i32>, <8 x i32> addrspace(2)* %237, align 32, !invariant.load !0 %239 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %240 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %239, i64 0, i64 35, !amdgpu.uniform !0 %241 = load <4 x i32>, <4 x i32> addrspace(2)* %240, align 16, !invariant.load !0 %242 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %236, <8 x i32> %238, <4 x i32> %241, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %243 = fadd nsz float %TEMP5.x.4, %242 %244 = fsub nsz float %44, %40 %245 = fmul nsz float %42, -2.000000e+00 %246 = fadd nsz float %245, %45 %247 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %248 = load <8 x i32>, <8 x i32> addrspace(2)* %247, align 32, !invariant.load !0 %249 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %239, i64 0, i64 39, !amdgpu.uniform !0 %250 = load <4 x i32>, <4 x i32> addrspace(2)* %249, align 16, !invariant.load !0 %251 = bitcast float %244 to i32 %252 = bitcast float %246 to i32 %253 = insertelement <2 x i32> undef, i32 %251, i32 0 %254 = insertelement <2 x i32> %253, i32 %252, i32 1 %255 = bitcast <2 x i32> %254 to <2 x float> %256 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %255, <8 x i32> %248, <4 x i32> %250, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %257 = extractelement <4 x float> %256, i32 0 %258 = extractelement <4 x float> %256, i32 1 %259 = fmul nsz float %242, %257 %260 = fadd nsz float %259, %TEMP0.x.4 %261 = fmul nsz float %242, %258 %262 = fadd nsz float %261, %TEMP0.y.4 br label %endif95 endif95: ; preds = %if86, %endif80 %TEMP5.x.5 = phi float [ %243, %if86 ], [ %TEMP5.x.4, %endif80 ] %TEMP0.y.5 = phi float [ %262, %if86 ], [ %TEMP0.y.4, %endif80 ] %TEMP0.x.5 = phi float [ %260, %if86 ], [ %TEMP0.x.4, %endif80 ] %263 = fadd nsz float %81, %193 %264 = call nsz float @llvm.sqrt.f32(float %263) #2 %265 = fmul nsz float %264, 0x3FD3C36BE0000000 %266 = fcmp nsz olt float %265, 1.000000e+00 br i1 %266, label %if101, label %endif110 if101: ; preds = %endif95 %267 = fsub nsz float 1.000000e+00, %265 %268 = fmul nsz float %265, 0x3FEFC00000000000 %269 = fmul nsz float %267, 7.812500e-03 %270 = fadd nsz float %268, %269 %271 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %272 = load <8 x i32>, <8 x i32> addrspace(2)* %271, align 32, !invariant.load !0 %273 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %274 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %273, i64 0, i64 35, !amdgpu.uniform !0 %275 = load <4 x i32>, <4 x i32> addrspace(2)* %274, align 16, !invariant.load !0 %276 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %270, <8 x i32> %272, <4 x i32> %275, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %277 = fadd nsz float %TEMP5.x.5, %276 %278 = fmul nsz float %40, 0.000000e+00 %279 = fadd nsz float %278, %44 %280 = fmul nsz float %42, -2.000000e+00 %281 = fadd nsz float %280, %45 %282 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %283 = load <8 x i32>, <8 x i32> addrspace(2)* %282, align 32, !invariant.load !0 %284 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %273, i64 0, i64 39, !amdgpu.uniform !0 %285 = load <4 x i32>, <4 x i32> addrspace(2)* %284, align 16, !invariant.load !0 %286 = bitcast float %279 to i32 %287 = bitcast float %281 to i32 %288 = insertelement <2 x i32> undef, i32 %286, i32 0 %289 = insertelement <2 x i32> %288, i32 %287, i32 1 %290 = bitcast <2 x i32> %289 to <2 x float> %291 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %290, <8 x i32> %283, <4 x i32> %285, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %292 = extractelement <4 x float> %291, i32 0 %293 = extractelement <4 x float> %291, i32 1 %294 = fmul nsz float %276, %292 %295 = fadd nsz float %294, %TEMP0.x.5 %296 = fmul nsz float %276, %293 %297 = fadd nsz float %296, %TEMP0.y.5 br label %endif110 endif110: ; preds = %if101, %endif95 %TEMP5.x.6 = phi float [ %277, %if101 ], [ %TEMP5.x.5, %endif95 ] %TEMP0.y.6 = phi float [ %297, %if101 ], [ %TEMP0.y.5, %endif95 ] %TEMP0.x.6 = phi float [ %295, %if101 ], [ %TEMP0.x.5, %endif95 ] %298 = fadd nsz float %118, %193 %299 = call nsz float @llvm.sqrt.f32(float %298) #2 %300 = fmul nsz float %299, 0x3FD3C36BE0000000 %301 = fcmp nsz olt float %300, 1.000000e+00 br i1 %301, label %if116, label %endif125 if116: ; preds = %endif110 %302 = fsub nsz float 1.000000e+00, %300 %303 = fmul nsz float %300, 0x3FEFC00000000000 %304 = fmul nsz float %302, 7.812500e-03 %305 = fadd nsz float %303, %304 %306 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %307 = load <8 x i32>, <8 x i32> addrspace(2)* %306, align 32, !invariant.load !0 %308 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %309 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %308, i64 0, i64 35, !amdgpu.uniform !0 %310 = load <4 x i32>, <4 x i32> addrspace(2)* %309, align 16, !invariant.load !0 %311 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %305, <8 x i32> %307, <4 x i32> %310, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %312 = fadd nsz float %TEMP5.x.6, %311 %313 = fadd nsz float %40, %44 %314 = fmul nsz float %42, -2.000000e+00 %315 = fadd nsz float %314, %45 %316 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %317 = load <8 x i32>, <8 x i32> addrspace(2)* %316, align 32, !invariant.load !0 %318 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %308, i64 0, i64 39, !amdgpu.uniform !0 %319 = load <4 x i32>, <4 x i32> addrspace(2)* %318, align 16, !invariant.load !0 %320 = bitcast float %313 to i32 %321 = bitcast float %315 to i32 %322 = insertelement <2 x i32> undef, i32 %320, i32 0 %323 = insertelement <2 x i32> %322, i32 %321, i32 1 %324 = bitcast <2 x i32> %323 to <2 x float> %325 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %324, <8 x i32> %317, <4 x i32> %319, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %326 = extractelement <4 x float> %325, i32 0 %327 = extractelement <4 x float> %325, i32 1 %328 = fmul nsz float %311, %326 %329 = fadd nsz float %328, %TEMP0.x.6 %330 = fmul nsz float %311, %327 %331 = fadd nsz float %330, %TEMP0.y.6 br label %endif125 endif125: ; preds = %if116, %endif110 %TEMP5.x.7 = phi float [ %312, %if116 ], [ %TEMP5.x.6, %endif110 ] %TEMP0.y.7 = phi float [ %331, %if116 ], [ %TEMP0.y.6, %endif110 ] %TEMP0.x.7 = phi float [ %329, %if116 ], [ %TEMP0.x.6, %endif110 ] %332 = fadd nsz float %154, %193 %333 = call nsz float @llvm.sqrt.f32(float %332) #2 %334 = fmul nsz float %333, 0x3FD3C36BE0000000 %335 = fcmp nsz olt float %334, 1.000000e+00 br i1 %335, label %if131, label %endif140 if131: ; preds = %endif125 %336 = fsub nsz float 1.000000e+00, %334 %337 = fmul nsz float %334, 0x3FEFC00000000000 %338 = fmul nsz float %336, 7.812500e-03 %339 = fadd nsz float %337, %338 %340 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %341 = load <8 x i32>, <8 x i32> addrspace(2)* %340, align 32, !invariant.load !0 %342 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %343 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %342, i64 0, i64 35, !amdgpu.uniform !0 %344 = load <4 x i32>, <4 x i32> addrspace(2)* %343, align 16, !invariant.load !0 %345 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %339, <8 x i32> %341, <4 x i32> %344, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %346 = fadd nsz float %TEMP5.x.7, %345 %347 = fmul nsz float %40, 2.000000e+00 %348 = fadd nsz float %347, %44 %349 = fmul nsz float %42, -2.000000e+00 %350 = fadd nsz float %349, %45 %351 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %352 = load <8 x i32>, <8 x i32> addrspace(2)* %351, align 32, !invariant.load !0 %353 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %342, i64 0, i64 39, !amdgpu.uniform !0 %354 = load <4 x i32>, <4 x i32> addrspace(2)* %353, align 16, !invariant.load !0 %355 = bitcast float %348 to i32 %356 = bitcast float %350 to i32 %357 = insertelement <2 x i32> undef, i32 %355, i32 0 %358 = insertelement <2 x i32> %357, i32 %356, i32 1 %359 = bitcast <2 x i32> %358 to <2 x float> %360 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %359, <8 x i32> %352, <4 x i32> %354, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %361 = extractelement <4 x float> %360, i32 0 %362 = extractelement <4 x float> %360, i32 1 %363 = fmul nsz float %345, %361 %364 = fadd nsz float %363, %TEMP0.x.7 %365 = fmul nsz float %345, %362 %366 = fadd nsz float %365, %TEMP0.y.7 br label %endif140 endif140: ; preds = %if131, %endif125 %TEMP5.x.8 = phi float [ %346, %if131 ], [ %TEMP5.x.7, %endif125 ] %TEMP0.y.8 = phi float [ %366, %if131 ], [ %TEMP0.y.7, %endif125 ] %TEMP0.x.8 = phi float [ %364, %if131 ], [ %TEMP0.x.7, %endif125 ] %367 = fsub nsz float 3.000000e+00, %37 %368 = fmul nsz float %367, %367 %369 = fadd nsz float %368, %193 %370 = call nsz float @llvm.sqrt.f32(float %369) #2 %371 = fmul nsz float %370, 0x3FD3C36BE0000000 %372 = fcmp nsz olt float %371, 1.000000e+00 br i1 %372, label %if146, label %endif155 if146: ; preds = %endif140 %373 = fsub nsz float 1.000000e+00, %371 %374 = fmul nsz float %371, 0x3FEFC00000000000 %375 = fmul nsz float %373, 7.812500e-03 %376 = fadd nsz float %374, %375 %377 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %378 = load <8 x i32>, <8 x i32> addrspace(2)* %377, align 32, !invariant.load !0 %379 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %380 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %379, i64 0, i64 35, !amdgpu.uniform !0 %381 = load <4 x i32>, <4 x i32> addrspace(2)* %380, align 16, !invariant.load !0 %382 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %376, <8 x i32> %378, <4 x i32> %381, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %383 = fadd nsz float %TEMP5.x.8, %382 %384 = fmul nsz float %40, 3.000000e+00 %385 = fadd nsz float %384, %44 %386 = fmul nsz float %42, -2.000000e+00 %387 = fadd nsz float %386, %45 %388 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %389 = load <8 x i32>, <8 x i32> addrspace(2)* %388, align 32, !invariant.load !0 %390 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %379, i64 0, i64 39, !amdgpu.uniform !0 %391 = load <4 x i32>, <4 x i32> addrspace(2)* %390, align 16, !invariant.load !0 %392 = bitcast float %385 to i32 %393 = bitcast float %387 to i32 %394 = insertelement <2 x i32> undef, i32 %392, i32 0 %395 = insertelement <2 x i32> %394, i32 %393, i32 1 %396 = bitcast <2 x i32> %395 to <2 x float> %397 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %396, <8 x i32> %389, <4 x i32> %391, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %398 = extractelement <4 x float> %397, i32 0 %399 = extractelement <4 x float> %397, i32 1 %400 = fmul nsz float %382, %398 %401 = fadd nsz float %400, %TEMP0.x.8 %402 = fmul nsz float %382, %399 %403 = fadd nsz float %402, %TEMP0.y.8 br label %endif155 endif155: ; preds = %if146, %endif140 %TEMP5.x.9 = phi float [ %383, %if146 ], [ %TEMP5.x.8, %endif140 ] %TEMP0.y.9 = phi float [ %403, %if146 ], [ %TEMP0.y.8, %endif140 ] %TEMP0.x.9 = phi float [ %401, %if146 ], [ %TEMP0.x.8, %endif140 ] %404 = fsub nsz float -3.000000e+00, %37 %405 = fsub nsz float -1.000000e+00, %39 %406 = fmul nsz float %404, %404 %407 = fmul nsz float %405, %405 %408 = fadd nsz float %406, %407 %409 = call nsz float @llvm.sqrt.f32(float %408) #2 %410 = fmul nsz float %409, 0x3FD3C36BE0000000 %411 = fcmp nsz olt float %410, 1.000000e+00 br i1 %411, label %if161, label %endif170 if161: ; preds = %endif155 %412 = fsub nsz float 1.000000e+00, %410 %413 = fmul nsz float %410, 0x3FEFC00000000000 %414 = fmul nsz float %412, 7.812500e-03 %415 = fadd nsz float %413, %414 %416 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %417 = load <8 x i32>, <8 x i32> addrspace(2)* %416, align 32, !invariant.load !0 %418 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %419 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %418, i64 0, i64 35, !amdgpu.uniform !0 %420 = load <4 x i32>, <4 x i32> addrspace(2)* %419, align 16, !invariant.load !0 %421 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %415, <8 x i32> %417, <4 x i32> %420, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %422 = fadd nsz float %TEMP5.x.9, %421 %423 = fmul nsz float %40, -3.000000e+00 %424 = fadd nsz float %423, %44 %425 = fsub nsz float %45, %42 %426 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %427 = load <8 x i32>, <8 x i32> addrspace(2)* %426, align 32, !invariant.load !0 %428 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %418, i64 0, i64 39, !amdgpu.uniform !0 %429 = load <4 x i32>, <4 x i32> addrspace(2)* %428, align 16, !invariant.load !0 %430 = bitcast float %424 to i32 %431 = bitcast float %425 to i32 %432 = insertelement <2 x i32> undef, i32 %430, i32 0 %433 = insertelement <2 x i32> %432, i32 %431, i32 1 %434 = bitcast <2 x i32> %433 to <2 x float> %435 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %434, <8 x i32> %427, <4 x i32> %429, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %436 = extractelement <4 x float> %435, i32 0 %437 = extractelement <4 x float> %435, i32 1 %438 = fmul nsz float %421, %436 %439 = fadd nsz float %438, %TEMP0.x.9 %440 = fmul nsz float %421, %437 %441 = fadd nsz float %440, %TEMP0.y.9 br label %endif170 endif170: ; preds = %if161, %endif155 %TEMP5.x.10 = phi float [ %422, %if161 ], [ %TEMP5.x.9, %endif155 ] %TEMP0.y.10 = phi float [ %441, %if161 ], [ %TEMP0.y.9, %endif155 ] %TEMP0.x.10 = phi float [ %439, %if161 ], [ %TEMP0.x.9, %endif155 ] %442 = fadd nsz float %192, %407 %443 = call nsz float @llvm.sqrt.f32(float %442) #2 %444 = fmul nsz float %443, 0x3FD3C36BE0000000 %445 = fcmp nsz olt float %444, 1.000000e+00 br i1 %445, label %if176, label %endif185 if176: ; preds = %endif170 %446 = fsub nsz float 1.000000e+00, %444 %447 = fmul nsz float %444, 0x3FEFC00000000000 %448 = fmul nsz float %446, 7.812500e-03 %449 = fadd nsz float %447, %448 %450 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %451 = load <8 x i32>, <8 x i32> addrspace(2)* %450, align 32, !invariant.load !0 %452 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %453 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %452, i64 0, i64 35, !amdgpu.uniform !0 %454 = load <4 x i32>, <4 x i32> addrspace(2)* %453, align 16, !invariant.load !0 %455 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %449, <8 x i32> %451, <4 x i32> %454, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %456 = fadd nsz float %TEMP5.x.10, %455 %457 = fmul nsz float %40, -2.000000e+00 %458 = fadd nsz float %457, %44 %459 = fsub nsz float %45, %42 %460 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %461 = load <8 x i32>, <8 x i32> addrspace(2)* %460, align 32, !invariant.load !0 %462 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %452, i64 0, i64 39, !amdgpu.uniform !0 %463 = load <4 x i32>, <4 x i32> addrspace(2)* %462, align 16, !invariant.load !0 %464 = bitcast float %458 to i32 %465 = bitcast float %459 to i32 %466 = insertelement <2 x i32> undef, i32 %464, i32 0 %467 = insertelement <2 x i32> %466, i32 %465, i32 1 %468 = bitcast <2 x i32> %467 to <2 x float> %469 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %468, <8 x i32> %461, <4 x i32> %463, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %470 = extractelement <4 x float> %469, i32 0 %471 = extractelement <4 x float> %469, i32 1 %472 = fmul nsz float %455, %470 %473 = fadd nsz float %472, %TEMP0.x.10 %474 = fmul nsz float %455, %471 %475 = fadd nsz float %474, %TEMP0.y.10 br label %endif185 endif185: ; preds = %if176, %endif170 %TEMP5.x.11 = phi float [ %456, %if176 ], [ %TEMP5.x.10, %endif170 ] %TEMP0.y.11 = phi float [ %475, %if176 ], [ %TEMP0.y.10, %endif170 ] %TEMP0.x.11 = phi float [ %473, %if176 ], [ %TEMP0.x.10, %endif170 ] %476 = fadd nsz float %48, %407 %477 = call nsz float @llvm.sqrt.f32(float %476) #2 %478 = fmul nsz float %477, 0x3FD3C36BE0000000 %479 = fsub nsz float 1.000000e+00, %478 %480 = fmul nsz float %478, 0x3FEFC00000000000 %481 = fmul nsz float %479, 7.812500e-03 %482 = fadd nsz float %480, %481 %483 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %484 = load <8 x i32>, <8 x i32> addrspace(2)* %483, align 32, !invariant.load !0 %485 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %486 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %485, i64 0, i64 35, !amdgpu.uniform !0 %487 = load <4 x i32>, <4 x i32> addrspace(2)* %486, align 16, !invariant.load !0 %488 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %482, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %489 = fadd nsz float %TEMP5.x.11, %488 %490 = fsub nsz float %44, %40 %491 = fsub nsz float %45, %42 %492 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %493 = load <8 x i32>, <8 x i32> addrspace(2)* %492, align 32, !invariant.load !0 %494 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %485, i64 0, i64 39, !amdgpu.uniform !0 %495 = load <4 x i32>, <4 x i32> addrspace(2)* %494, align 16, !invariant.load !0 %496 = bitcast float %490 to i32 %497 = bitcast float %491 to i32 %498 = insertelement <2 x i32> undef, i32 %496, i32 0 %499 = insertelement <2 x i32> %498, i32 %497, i32 1 %500 = bitcast <2 x i32> %499 to <2 x float> %501 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %500, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %502 = extractelement <4 x float> %501, i32 0 %503 = extractelement <4 x float> %501, i32 1 %504 = fmul nsz float %488, %502 %505 = fadd nsz float %504, %TEMP0.x.11 %506 = fmul nsz float %488, %503 %507 = fadd nsz float %506, %TEMP0.y.11 %508 = fadd nsz float %81, %407 %509 = call nsz float @llvm.sqrt.f32(float %508) #2 %510 = fmul nsz float %509, 0x3FD3C36BE0000000 %511 = fsub nsz float 1.000000e+00, %510 %512 = fmul nsz float %510, 0x3FEFC00000000000 %513 = fmul nsz float %511, 7.812500e-03 %514 = fadd nsz float %512, %513 %515 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %514, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %516 = fadd nsz float %489, %515 %517 = fmul nsz float %40, 0.000000e+00 %518 = fadd nsz float %517, %44 %519 = fsub nsz float %45, %42 %520 = bitcast float %518 to i32 %521 = bitcast float %519 to i32 %522 = insertelement <2 x i32> undef, i32 %520, i32 0 %523 = insertelement <2 x i32> %522, i32 %521, i32 1 %524 = bitcast <2 x i32> %523 to <2 x float> %525 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %524, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %526 = extractelement <4 x float> %525, i32 0 %527 = extractelement <4 x float> %525, i32 1 %528 = fmul nsz float %515, %526 %529 = fadd nsz float %528, %505 %530 = fmul nsz float %515, %527 %531 = fadd nsz float %530, %507 %532 = fadd nsz float %118, %407 %533 = call nsz float @llvm.sqrt.f32(float %532) #2 %534 = fmul nsz float %533, 0x3FD3C36BE0000000 %535 = fsub nsz float 1.000000e+00, %534 %536 = fmul nsz float %534, 0x3FEFC00000000000 %537 = fmul nsz float %535, 7.812500e-03 %538 = fadd nsz float %536, %537 %539 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %538, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %540 = fadd nsz float %516, %539 %541 = fadd nsz float %40, %44 %542 = bitcast float %541 to i32 %543 = insertelement <2 x i32> undef, i32 %542, i32 0 %544 = insertelement <2 x i32> %543, i32 %521, i32 1 %545 = bitcast <2 x i32> %544 to <2 x float> %546 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %545, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %547 = extractelement <4 x float> %546, i32 0 %548 = extractelement <4 x float> %546, i32 1 %549 = fmul nsz float %539, %547 %550 = fadd nsz float %549, %529 %551 = fmul nsz float %539, %548 %552 = fadd nsz float %551, %531 %553 = fadd nsz float %154, %407 %554 = call nsz float @llvm.sqrt.f32(float %553) #2 %555 = fmul nsz float %554, 0x3FD3C36BE0000000 %556 = fsub nsz float 1.000000e+00, %555 %557 = fmul nsz float %555, 0x3FEFC00000000000 %558 = fmul nsz float %556, 7.812500e-03 %559 = fadd nsz float %557, %558 %560 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %559, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %561 = fadd nsz float %540, %560 %562 = fmul nsz float %40, 2.000000e+00 %563 = fadd nsz float %562, %44 %564 = bitcast float %563 to i32 %565 = insertelement <2 x i32> undef, i32 %564, i32 0 %566 = insertelement <2 x i32> %565, i32 %521, i32 1 %567 = bitcast <2 x i32> %566 to <2 x float> %568 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %567, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %569 = extractelement <4 x float> %568, i32 0 %570 = extractelement <4 x float> %568, i32 1 %571 = fmul nsz float %560, %569 %572 = fadd nsz float %571, %550 %573 = fmul nsz float %560, %570 %574 = fadd nsz float %573, %552 %575 = fadd nsz float %368, %407 %576 = call nsz float @llvm.sqrt.f32(float %575) #2 %577 = fmul nsz float %576, 0x3FD3C36BE0000000 %578 = fcmp nsz olt float %577, 1.000000e+00 br i1 %578, label %if239, label %endif248 if239: ; preds = %endif185 %579 = fsub nsz float 1.000000e+00, %577 %580 = fmul nsz float %577, 0x3FEFC00000000000 %581 = fmul nsz float %579, 7.812500e-03 %582 = fadd nsz float %580, %581 %583 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %582, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %584 = fadd nsz float %561, %583 %585 = fmul nsz float %40, 3.000000e+00 %586 = fadd nsz float %585, %44 %587 = bitcast float %586 to i32 %588 = insertelement <2 x i32> undef, i32 %587, i32 0 %589 = insertelement <2 x i32> %588, i32 %521, i32 1 %590 = bitcast <2 x i32> %589 to <2 x float> %591 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %590, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %592 = extractelement <4 x float> %591, i32 0 %593 = extractelement <4 x float> %591, i32 1 %594 = fmul nsz float %583, %592 %595 = fadd nsz float %594, %572 %596 = fmul nsz float %583, %593 %597 = fadd nsz float %596, %574 br label %endif248 endif248: ; preds = %if239, %endif185 %TEMP5.x.12 = phi float [ %584, %if239 ], [ %561, %endif185 ] %TEMP0.y.12 = phi float [ %597, %if239 ], [ %574, %endif185 ] %TEMP0.x.12 = phi float [ %595, %if239 ], [ %572, %endif185 ] %598 = fsub nsz float 4.000000e+00, %37 %599 = fmul nsz float %598, %598 %600 = fadd nsz float %599, %407 %601 = call nsz float @llvm.sqrt.f32(float %600) #2 %602 = fmul nsz float %601, 0x3FD3C36BE0000000 %603 = fcmp nsz olt float %602, 1.000000e+00 br i1 %603, label %if254, label %endif263 if254: ; preds = %endif248 %604 = fsub nsz float 1.000000e+00, %602 %605 = fmul nsz float %602, 0x3FEFC00000000000 %606 = fmul nsz float %604, 7.812500e-03 %607 = fadd nsz float %605, %606 %608 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %607, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %609 = fadd nsz float %TEMP5.x.12, %608 %610 = fmul nsz float %40, 4.000000e+00 %611 = fadd nsz float %610, %44 %612 = bitcast float %611 to i32 %613 = insertelement <2 x i32> undef, i32 %612, i32 0 %614 = insertelement <2 x i32> %613, i32 %521, i32 1 %615 = bitcast <2 x i32> %614 to <2 x float> %616 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %615, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %617 = extractelement <4 x float> %616, i32 0 %618 = extractelement <4 x float> %616, i32 1 %619 = fmul nsz float %608, %617 %620 = fadd nsz float %619, %TEMP0.x.12 %621 = fmul nsz float %608, %618 %622 = fadd nsz float %621, %TEMP0.y.12 br label %endif263 endif263: ; preds = %if254, %endif248 %TEMP5.x.13 = phi float [ %609, %if254 ], [ %TEMP5.x.12, %endif248 ] %TEMP0.y.13 = phi float [ %622, %if254 ], [ %TEMP0.y.12, %endif248 ] %TEMP0.x.13 = phi float [ %620, %if254 ], [ %TEMP0.x.12, %endif248 ] %623 = fmul nsz float %39, %39 %624 = fadd nsz float %406, %623 %625 = call nsz float @llvm.sqrt.f32(float %624) #2 %626 = fmul nsz float %625, 0x3FD3C36BE0000000 %627 = fcmp nsz olt float %626, 1.000000e+00 br i1 %627, label %if269, label %endif278 if269: ; preds = %endif263 %628 = fsub nsz float 1.000000e+00, %626 %629 = fmul nsz float %626, 0x3FEFC00000000000 %630 = fmul nsz float %628, 7.812500e-03 %631 = fadd nsz float %629, %630 %632 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %631, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %633 = fadd nsz float %TEMP5.x.13, %632 %634 = fmul nsz float %40, -3.000000e+00 %635 = fadd nsz float %634, %44 %636 = fmul nsz float %42, 0.000000e+00 %637 = fadd nsz float %636, %45 %638 = bitcast float %635 to i32 %639 = bitcast float %637 to i32 %640 = insertelement <2 x i32> undef, i32 %638, i32 0 %641 = insertelement <2 x i32> %640, i32 %639, i32 1 %642 = bitcast <2 x i32> %641 to <2 x float> %643 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %642, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %644 = extractelement <4 x float> %643, i32 0 %645 = extractelement <4 x float> %643, i32 1 %646 = fmul nsz float %632, %644 %647 = fadd nsz float %646, %TEMP0.x.13 %648 = fmul nsz float %632, %645 %649 = fadd nsz float %648, %TEMP0.y.13 br label %endif278 endif278: ; preds = %if269, %endif263 %TEMP5.x.14 = phi float [ %633, %if269 ], [ %TEMP5.x.13, %endif263 ] %TEMP0.y.14 = phi float [ %649, %if269 ], [ %TEMP0.y.13, %endif263 ] %TEMP0.x.14 = phi float [ %647, %if269 ], [ %TEMP0.x.13, %endif263 ] %650 = fadd nsz float %192, %623 %651 = call nsz float @llvm.sqrt.f32(float %650) #2 %652 = fmul nsz float %651, 0x3FD3C36BE0000000 %653 = fcmp nsz olt float %652, 1.000000e+00 br i1 %653, label %if284, label %endif293 if284: ; preds = %endif278 %654 = fsub nsz float 1.000000e+00, %652 %655 = fmul nsz float %652, 0x3FEFC00000000000 %656 = fmul nsz float %654, 7.812500e-03 %657 = fadd nsz float %655, %656 %658 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %657, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %659 = fadd nsz float %TEMP5.x.14, %658 %660 = fmul nsz float %40, -2.000000e+00 %661 = fadd nsz float %660, %44 %662 = fmul nsz float %42, 0.000000e+00 %663 = fadd nsz float %662, %45 %664 = bitcast float %661 to i32 %665 = bitcast float %663 to i32 %666 = insertelement <2 x i32> undef, i32 %664, i32 0 %667 = insertelement <2 x i32> %666, i32 %665, i32 1 %668 = bitcast <2 x i32> %667 to <2 x float> %669 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %668, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %670 = extractelement <4 x float> %669, i32 0 %671 = extractelement <4 x float> %669, i32 1 %672 = fmul nsz float %658, %670 %673 = fadd nsz float %672, %TEMP0.x.14 %674 = fmul nsz float %658, %671 %675 = fadd nsz float %674, %TEMP0.y.14 br label %endif293 endif293: ; preds = %if284, %endif278 %TEMP5.x.15 = phi float [ %659, %if284 ], [ %TEMP5.x.14, %endif278 ] %TEMP0.y.15 = phi float [ %675, %if284 ], [ %TEMP0.y.14, %endif278 ] %TEMP0.x.15 = phi float [ %673, %if284 ], [ %TEMP0.x.14, %endif278 ] %676 = fadd nsz float %48, %623 %677 = call nsz float @llvm.sqrt.f32(float %676) #2 %678 = fmul nsz float %677, 0x3FD3C36BE0000000 %679 = fsub nsz float 1.000000e+00, %678 %680 = fmul nsz float %678, 0x3FEFC00000000000 %681 = fmul nsz float %679, 7.812500e-03 %682 = fadd nsz float %680, %681 %683 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %682, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %684 = fadd nsz float %TEMP5.x.15, %683 %685 = fsub nsz float %44, %40 %686 = fmul nsz float %42, 0.000000e+00 %687 = fadd nsz float %686, %45 %688 = bitcast float %685 to i32 %689 = bitcast float %687 to i32 %690 = insertelement <2 x i32> undef, i32 %688, i32 0 %691 = insertelement <2 x i32> %690, i32 %689, i32 1 %692 = bitcast <2 x i32> %691 to <2 x float> %693 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %692, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %694 = extractelement <4 x float> %693, i32 0 %695 = extractelement <4 x float> %693, i32 1 %696 = fmul nsz float %683, %694 %697 = fadd nsz float %696, %TEMP0.x.15 %698 = fmul nsz float %683, %695 %699 = fadd nsz float %698, %TEMP0.y.15 %700 = fadd nsz float %81, %623 %701 = call nsz float @llvm.sqrt.f32(float %700) #2 %702 = fmul nsz float %701, 0x3FD3C36BE0000000 %703 = fsub nsz float 1.000000e+00, %702 %704 = fmul nsz float %702, 0x3FEFC00000000000 %705 = fmul nsz float %703, 7.812500e-03 %706 = fadd nsz float %704, %705 %707 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %706, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %708 = fadd nsz float %684, %707 %709 = bitcast float %44 to i32 %710 = bitcast float %45 to i32 %711 = insertelement <2 x i32> undef, i32 %709, i32 0 %712 = insertelement <2 x i32> %711, i32 %710, i32 1 %713 = bitcast <2 x i32> %712 to <2 x float> %714 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %713, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %715 = extractelement <4 x float> %714, i32 0 %716 = extractelement <4 x float> %714, i32 1 %717 = fmul nsz float %707, %715 %718 = fadd nsz float %717, %697 %719 = fmul nsz float %707, %716 %720 = fadd nsz float %719, %699 %721 = fadd nsz float %118, %623 %722 = call nsz float @llvm.sqrt.f32(float %721) #2 %723 = fmul nsz float %722, 0x3FD3C36BE0000000 %724 = fsub nsz float 1.000000e+00, %723 %725 = fmul nsz float %723, 0x3FEFC00000000000 %726 = fmul nsz float %724, 7.812500e-03 %727 = fadd nsz float %725, %726 %728 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %727, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %729 = fadd nsz float %708, %728 %730 = insertelement <2 x i32> %543, i32 %689, i32 1 %731 = bitcast <2 x i32> %730 to <2 x float> %732 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %731, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %733 = extractelement <4 x float> %732, i32 0 %734 = extractelement <4 x float> %732, i32 1 %735 = fmul nsz float %728, %733 %736 = fadd nsz float %735, %718 %737 = fmul nsz float %728, %734 %738 = fadd nsz float %737, %720 %739 = fadd nsz float %154, %623 %740 = call nsz float @llvm.sqrt.f32(float %739) #2 %741 = fmul nsz float %740, 0x3FD3C36BE0000000 %742 = fsub nsz float 1.000000e+00, %741 %743 = fmul nsz float %741, 0x3FEFC00000000000 %744 = fmul nsz float %742, 7.812500e-03 %745 = fadd nsz float %743, %744 %746 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %745, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %747 = fadd nsz float %729, %746 %748 = insertelement <2 x i32> %565, i32 %689, i32 1 %749 = bitcast <2 x i32> %748 to <2 x float> %750 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %749, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %751 = extractelement <4 x float> %750, i32 0 %752 = extractelement <4 x float> %750, i32 1 %753 = fmul nsz float %746, %751 %754 = fadd nsz float %753, %736 %755 = fmul nsz float %746, %752 %756 = fadd nsz float %755, %738 %757 = fadd nsz float %368, %623 %758 = call nsz float @llvm.sqrt.f32(float %757) #2 %759 = fmul nsz float %758, 0x3FD3C36BE0000000 %760 = fcmp nsz olt float %759, 1.000000e+00 br i1 %760, label %if346, label %endif355 if346: ; preds = %endif293 %761 = fsub nsz float 1.000000e+00, %759 %762 = fmul nsz float %759, 0x3FEFC00000000000 %763 = fmul nsz float %761, 7.812500e-03 %764 = fadd nsz float %762, %763 %765 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %764, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %766 = fadd nsz float %747, %765 %767 = fmul nsz float %40, 3.000000e+00 %768 = fadd nsz float %767, %44 %769 = bitcast float %768 to i32 %770 = insertelement <2 x i32> undef, i32 %769, i32 0 %771 = insertelement <2 x i32> %770, i32 %689, i32 1 %772 = bitcast <2 x i32> %771 to <2 x float> %773 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %772, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %774 = extractelement <4 x float> %773, i32 0 %775 = extractelement <4 x float> %773, i32 1 %776 = fmul nsz float %765, %774 %777 = fadd nsz float %776, %754 %778 = fmul nsz float %765, %775 %779 = fadd nsz float %778, %756 br label %endif355 endif355: ; preds = %if346, %endif293 %TEMP5.x.16 = phi float [ %766, %if346 ], [ %747, %endif293 ] %TEMP0.y.16 = phi float [ %779, %if346 ], [ %756, %endif293 ] %TEMP0.x.16 = phi float [ %777, %if346 ], [ %754, %endif293 ] %780 = fadd nsz float %599, %623 %781 = call nsz float @llvm.sqrt.f32(float %780) #2 %782 = fmul nsz float %781, 0x3FD3C36BE0000000 %783 = fcmp nsz olt float %782, 1.000000e+00 br i1 %783, label %if361, label %endif370 if361: ; preds = %endif355 %784 = fsub nsz float 1.000000e+00, %782 %785 = fmul nsz float %782, 0x3FEFC00000000000 %786 = fmul nsz float %784, 7.812500e-03 %787 = fadd nsz float %785, %786 %788 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %787, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %789 = fadd nsz float %TEMP5.x.16, %788 %790 = fmul nsz float %40, 4.000000e+00 %791 = fadd nsz float %790, %44 %792 = bitcast float %791 to i32 %793 = insertelement <2 x i32> undef, i32 %792, i32 0 %794 = insertelement <2 x i32> %793, i32 %689, i32 1 %795 = bitcast <2 x i32> %794 to <2 x float> %796 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %795, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %797 = extractelement <4 x float> %796, i32 0 %798 = extractelement <4 x float> %796, i32 1 %799 = fmul nsz float %788, %797 %800 = fadd nsz float %799, %TEMP0.x.16 %801 = fmul nsz float %788, %798 %802 = fadd nsz float %801, %TEMP0.y.16 br label %endif370 endif370: ; preds = %if361, %endif355 %TEMP5.x.17 = phi float [ %789, %if361 ], [ %TEMP5.x.16, %endif355 ] %TEMP0.y.17 = phi float [ %802, %if361 ], [ %TEMP0.y.16, %endif355 ] %TEMP0.x.17 = phi float [ %800, %if361 ], [ %TEMP0.x.16, %endif355 ] %803 = fsub nsz float 1.000000e+00, %39 %804 = fmul nsz float %803, %803 %805 = fadd nsz float %406, %804 %806 = call nsz float @llvm.sqrt.f32(float %805) #2 %807 = fmul nsz float %806, 0x3FD3C36BE0000000 %808 = fcmp nsz olt float %807, 1.000000e+00 br i1 %808, label %if376, label %endif385 if376: ; preds = %endif370 %809 = fsub nsz float 1.000000e+00, %807 %810 = fmul nsz float %807, 0x3FEFC00000000000 %811 = fmul nsz float %809, 7.812500e-03 %812 = fadd nsz float %810, %811 %813 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %812, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %814 = fadd nsz float %TEMP5.x.17, %813 %815 = fmul nsz float %40, -3.000000e+00 %816 = fadd nsz float %815, %44 %817 = fadd nsz float %42, %45 %818 = bitcast float %816 to i32 %819 = bitcast float %817 to i32 %820 = insertelement <2 x i32> undef, i32 %818, i32 0 %821 = insertelement <2 x i32> %820, i32 %819, i32 1 %822 = bitcast <2 x i32> %821 to <2 x float> %823 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %822, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %824 = extractelement <4 x float> %823, i32 0 %825 = extractelement <4 x float> %823, i32 1 %826 = fmul nsz float %813, %824 %827 = fadd nsz float %826, %TEMP0.x.17 %828 = fmul nsz float %813, %825 %829 = fadd nsz float %828, %TEMP0.y.17 br label %endif385 endif385: ; preds = %if376, %endif370 %TEMP5.x.18 = phi float [ %814, %if376 ], [ %TEMP5.x.17, %endif370 ] %TEMP0.y.18 = phi float [ %829, %if376 ], [ %TEMP0.y.17, %endif370 ] %TEMP0.x.18 = phi float [ %827, %if376 ], [ %TEMP0.x.17, %endif370 ] %830 = fadd nsz float %192, %804 %831 = call nsz float @llvm.sqrt.f32(float %830) #2 %832 = fmul nsz float %831, 0x3FD3C36BE0000000 %833 = fcmp nsz olt float %832, 1.000000e+00 br i1 %833, label %if391, label %endif400 if391: ; preds = %endif385 %834 = fsub nsz float 1.000000e+00, %832 %835 = fmul nsz float %832, 0x3FEFC00000000000 %836 = fmul nsz float %834, 7.812500e-03 %837 = fadd nsz float %835, %836 %838 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %837, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %839 = fadd nsz float %TEMP5.x.18, %838 %840 = fmul nsz float %40, -2.000000e+00 %841 = fadd nsz float %840, %44 %842 = fadd nsz float %42, %45 %843 = bitcast float %841 to i32 %844 = bitcast float %842 to i32 %845 = insertelement <2 x i32> undef, i32 %843, i32 0 %846 = insertelement <2 x i32> %845, i32 %844, i32 1 %847 = bitcast <2 x i32> %846 to <2 x float> %848 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %847, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %849 = extractelement <4 x float> %848, i32 0 %850 = extractelement <4 x float> %848, i32 1 %851 = fmul nsz float %838, %849 %852 = fadd nsz float %851, %TEMP0.x.18 %853 = fmul nsz float %838, %850 %854 = fadd nsz float %853, %TEMP0.y.18 br label %endif400 endif400: ; preds = %if391, %endif385 %TEMP5.x.19 = phi float [ %839, %if391 ], [ %TEMP5.x.18, %endif385 ] %TEMP0.y.19 = phi float [ %854, %if391 ], [ %TEMP0.y.18, %endif385 ] %TEMP0.x.19 = phi float [ %852, %if391 ], [ %TEMP0.x.18, %endif385 ] %855 = fadd nsz float %48, %804 %856 = call nsz float @llvm.sqrt.f32(float %855) #2 %857 = fmul nsz float %856, 0x3FD3C36BE0000000 %858 = fsub nsz float 1.000000e+00, %857 %859 = fmul nsz float %857, 0x3FEFC00000000000 %860 = fmul nsz float %858, 7.812500e-03 %861 = fadd nsz float %859, %860 %862 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %861, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %863 = fadd nsz float %TEMP5.x.19, %862 %864 = fadd nsz float %42, %45 %865 = bitcast float %864 to i32 %866 = insertelement <2 x i32> %690, i32 %865, i32 1 %867 = bitcast <2 x i32> %866 to <2 x float> %868 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %867, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %869 = extractelement <4 x float> %868, i32 0 %870 = extractelement <4 x float> %868, i32 1 %871 = fmul nsz float %862, %869 %872 = fadd nsz float %871, %TEMP0.x.19 %873 = fmul nsz float %862, %870 %874 = fadd nsz float %873, %TEMP0.y.19 %875 = fadd nsz float %81, %804 %876 = call nsz float @llvm.sqrt.f32(float %875) #2 %877 = fmul nsz float %876, 0x3FD3C36BE0000000 %878 = fsub nsz float 1.000000e+00, %877 %879 = fmul nsz float %877, 0x3FEFC00000000000 %880 = fmul nsz float %878, 7.812500e-03 %881 = fadd nsz float %879, %880 %882 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %881, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %883 = fadd nsz float %863, %882 %884 = insertelement <2 x i32> %522, i32 %865, i32 1 %885 = bitcast <2 x i32> %884 to <2 x float> %886 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %885, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %887 = extractelement <4 x float> %886, i32 0 %888 = extractelement <4 x float> %886, i32 1 %889 = fmul nsz float %882, %887 %890 = fadd nsz float %889, %872 %891 = fmul nsz float %882, %888 %892 = fadd nsz float %891, %874 %893 = fadd nsz float %118, %804 %894 = call nsz float @llvm.sqrt.f32(float %893) #2 %895 = fmul nsz float %894, 0x3FD3C36BE0000000 %896 = fsub nsz float 1.000000e+00, %895 %897 = fmul nsz float %895, 0x3FEFC00000000000 %898 = fmul nsz float %896, 7.812500e-03 %899 = fadd nsz float %897, %898 %900 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %899, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %901 = fadd nsz float %883, %900 %902 = insertelement <2 x i32> %543, i32 %865, i32 1 %903 = bitcast <2 x i32> %902 to <2 x float> %904 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %903, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %905 = extractelement <4 x float> %904, i32 0 %906 = extractelement <4 x float> %904, i32 1 %907 = fmul nsz float %900, %905 %908 = fadd nsz float %907, %890 %909 = fmul nsz float %900, %906 %910 = fadd nsz float %909, %892 %911 = fadd nsz float %154, %804 %912 = call nsz float @llvm.sqrt.f32(float %911) #2 %913 = fmul nsz float %912, 0x3FD3C36BE0000000 %914 = fsub nsz float 1.000000e+00, %913 %915 = fmul nsz float %913, 0x3FEFC00000000000 %916 = fmul nsz float %914, 7.812500e-03 %917 = fadd nsz float %915, %916 %918 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %917, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %919 = fadd nsz float %901, %918 %920 = insertelement <2 x i32> %565, i32 %865, i32 1 %921 = bitcast <2 x i32> %920 to <2 x float> %922 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %921, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %923 = extractelement <4 x float> %922, i32 0 %924 = extractelement <4 x float> %922, i32 1 %925 = fmul nsz float %918, %923 %926 = fadd nsz float %925, %908 %927 = fmul nsz float %918, %924 %928 = fadd nsz float %927, %910 %929 = fadd nsz float %368, %804 %930 = call nsz float @llvm.sqrt.f32(float %929) #2 %931 = fmul nsz float %930, 0x3FD3C36BE0000000 %932 = fcmp nsz olt float %931, 1.000000e+00 br i1 %932, label %if454, label %endif463 if454: ; preds = %endif400 %933 = fsub nsz float 1.000000e+00, %931 %934 = fmul nsz float %931, 0x3FEFC00000000000 %935 = fmul nsz float %933, 7.812500e-03 %936 = fadd nsz float %934, %935 %937 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %936, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %938 = fadd nsz float %919, %937 %939 = fmul nsz float %40, 3.000000e+00 %940 = fadd nsz float %939, %44 %941 = bitcast float %940 to i32 %942 = insertelement <2 x i32> undef, i32 %941, i32 0 %943 = insertelement <2 x i32> %942, i32 %865, i32 1 %944 = bitcast <2 x i32> %943 to <2 x float> %945 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %944, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %946 = extractelement <4 x float> %945, i32 0 %947 = extractelement <4 x float> %945, i32 1 %948 = fmul nsz float %937, %946 %949 = fadd nsz float %948, %926 %950 = fmul nsz float %937, %947 %951 = fadd nsz float %950, %928 br label %endif463 endif463: ; preds = %if454, %endif400 %TEMP5.x.20 = phi float [ %938, %if454 ], [ %919, %endif400 ] %TEMP0.y.20 = phi float [ %951, %if454 ], [ %928, %endif400 ] %TEMP0.x.20 = phi float [ %949, %if454 ], [ %926, %endif400 ] %952 = fadd nsz float %599, %804 %953 = call nsz float @llvm.sqrt.f32(float %952) #2 %954 = fmul nsz float %953, 0x3FD3C36BE0000000 %955 = fcmp nsz olt float %954, 1.000000e+00 br i1 %955, label %if469, label %endif478 if469: ; preds = %endif463 %956 = fsub nsz float 1.000000e+00, %954 %957 = fmul nsz float %954, 0x3FEFC00000000000 %958 = fmul nsz float %956, 7.812500e-03 %959 = fadd nsz float %957, %958 %960 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %959, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %961 = fadd nsz float %TEMP5.x.20, %960 %962 = fmul nsz float %40, 4.000000e+00 %963 = fadd nsz float %962, %44 %964 = bitcast float %963 to i32 %965 = insertelement <2 x i32> undef, i32 %964, i32 0 %966 = insertelement <2 x i32> %965, i32 %865, i32 1 %967 = bitcast <2 x i32> %966 to <2 x float> %968 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %967, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %969 = extractelement <4 x float> %968, i32 0 %970 = extractelement <4 x float> %968, i32 1 %971 = fmul nsz float %960, %969 %972 = fadd nsz float %971, %TEMP0.x.20 %973 = fmul nsz float %960, %970 %974 = fadd nsz float %973, %TEMP0.y.20 br label %endif478 endif478: ; preds = %if469, %endif463 %TEMP5.x.21 = phi float [ %961, %if469 ], [ %TEMP5.x.20, %endif463 ] %TEMP0.y.21 = phi float [ %974, %if469 ], [ %TEMP0.y.20, %endif463 ] %TEMP0.x.21 = phi float [ %972, %if469 ], [ %TEMP0.x.20, %endif463 ] %975 = fsub nsz float 2.000000e+00, %39 %976 = fmul nsz float %975, %975 %977 = fadd nsz float %406, %976 %978 = call nsz float @llvm.sqrt.f32(float %977) #2 %979 = fmul nsz float %978, 0x3FD3C36BE0000000 %980 = fcmp nsz olt float %979, 1.000000e+00 br i1 %980, label %if484, label %endif493 if484: ; preds = %endif478 %981 = fsub nsz float 1.000000e+00, %979 %982 = fmul nsz float %979, 0x3FEFC00000000000 %983 = fmul nsz float %981, 7.812500e-03 %984 = fadd nsz float %982, %983 %985 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %984, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %986 = fadd nsz float %TEMP5.x.21, %985 %987 = fmul nsz float %40, -3.000000e+00 %988 = fadd nsz float %987, %44 %989 = fmul nsz float %42, 2.000000e+00 %990 = fadd nsz float %989, %45 %991 = bitcast float %988 to i32 %992 = bitcast float %990 to i32 %993 = insertelement <2 x i32> undef, i32 %991, i32 0 %994 = insertelement <2 x i32> %993, i32 %992, i32 1 %995 = bitcast <2 x i32> %994 to <2 x float> %996 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %995, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %997 = extractelement <4 x float> %996, i32 0 %998 = extractelement <4 x float> %996, i32 1 %999 = fmul nsz float %985, %997 %1000 = fadd nsz float %999, %TEMP0.x.21 %1001 = fmul nsz float %985, %998 %1002 = fadd nsz float %1001, %TEMP0.y.21 br label %endif493 endif493: ; preds = %if484, %endif478 %TEMP5.x.22 = phi float [ %986, %if484 ], [ %TEMP5.x.21, %endif478 ] %TEMP0.y.22 = phi float [ %1002, %if484 ], [ %TEMP0.y.21, %endif478 ] %TEMP0.x.22 = phi float [ %1000, %if484 ], [ %TEMP0.x.21, %endif478 ] %1003 = fadd nsz float %192, %976 %1004 = call nsz float @llvm.sqrt.f32(float %1003) #2 %1005 = fmul nsz float %1004, 0x3FD3C36BE0000000 %1006 = fcmp nsz olt float %1005, 1.000000e+00 br i1 %1006, label %if499, label %endif508 if499: ; preds = %endif493 %1007 = fsub nsz float 1.000000e+00, %1005 %1008 = fmul nsz float %1005, 0x3FEFC00000000000 %1009 = fmul nsz float %1007, 7.812500e-03 %1010 = fadd nsz float %1008, %1009 %1011 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %1010, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %1012 = fadd nsz float %TEMP5.x.22, %1011 %1013 = fmul nsz float %40, -2.000000e+00 %1014 = fadd nsz float %1013, %44 %1015 = fmul nsz float %42, 2.000000e+00 %1016 = fadd nsz float %1015, %45 %1017 = bitcast float %1014 to i32 %1018 = bitcast float %1016 to i32 %1019 = insertelement <2 x i32> undef, i32 %1017, i32 0 %1020 = insertelement <2 x i32> %1019, i32 %1018, i32 1 %1021 = bitcast <2 x i32> %1020 to <2 x float> %1022 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %1021, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %1023 = extractelement <4 x float> %1022, i32 0 %1024 = extractelement <4 x float> %1022, i32 1 %1025 = fmul nsz float %1011, %1023 %1026 = fadd nsz float %1025, %TEMP0.x.22 %1027 = fmul nsz float %1011, %1024 %1028 = fadd nsz float %1027, %TEMP0.y.22 br label %endif508 endif508: ; preds = %if499, %endif493 %TEMP5.x.23 = phi float [ %1012, %if499 ], [ %TEMP5.x.22, %endif493 ] %TEMP0.y.23 = phi float [ %1028, %if499 ], [ %TEMP0.y.22, %endif493 ] %TEMP0.x.23 = phi float [ %1026, %if499 ], [ %TEMP0.x.22, %endif493 ] %1029 = fadd nsz float %48, %976 %1030 = call nsz float @llvm.sqrt.f32(float %1029) #2 %1031 = fmul nsz float %1030, 0x3FD3C36BE0000000 %1032 = fsub nsz float 1.000000e+00, %1031 %1033 = fmul nsz float %1031, 0x3FEFC00000000000 %1034 = fmul nsz float %1032, 7.812500e-03 %1035 = fadd nsz float %1033, %1034 %1036 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %1035, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %1037 = fadd nsz float %TEMP5.x.23, %1036 %1038 = fmul nsz float %42, 2.000000e+00 %1039 = fadd nsz float %1038, %45 %1040 = bitcast float %1039 to i32 %1041 = insertelement <2 x i32> %690, i32 %1040, i32 1 %1042 = bitcast <2 x i32> %1041 to <2 x float> %1043 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %1042, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %1044 = extractelement <4 x float> %1043, i32 0 %1045 = extractelement <4 x float> %1043, i32 1 %1046 = fmul nsz float %1036, %1044 %1047 = fadd nsz float %1046, %TEMP0.x.23 %1048 = fmul nsz float %1036, %1045 %1049 = fadd nsz float %1048, %TEMP0.y.23 %1050 = fadd nsz float %81, %976 %1051 = call nsz float @llvm.sqrt.f32(float %1050) #2 %1052 = fmul nsz float %1051, 0x3FD3C36BE0000000 %1053 = fsub nsz float 1.000000e+00, %1052 %1054 = fmul nsz float %1052, 0x3FEFC00000000000 %1055 = fmul nsz float %1053, 7.812500e-03 %1056 = fadd nsz float %1054, %1055 %1057 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %1056, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %1058 = fadd nsz float %1037, %1057 %1059 = insertelement <2 x i32> %522, i32 %1040, i32 1 %1060 = bitcast <2 x i32> %1059 to <2 x float> %1061 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %1060, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %1062 = extractelement <4 x float> %1061, i32 0 %1063 = extractelement <4 x float> %1061, i32 1 %1064 = fmul nsz float %1057, %1062 %1065 = fadd nsz float %1064, %1047 %1066 = fmul nsz float %1057, %1063 %1067 = fadd nsz float %1066, %1049 %1068 = fadd nsz float %118, %976 %1069 = call nsz float @llvm.sqrt.f32(float %1068) #2 %1070 = fmul nsz float %1069, 0x3FD3C36BE0000000 %1071 = fsub nsz float 1.000000e+00, %1070 %1072 = fmul nsz float %1070, 0x3FEFC00000000000 %1073 = fmul nsz float %1071, 7.812500e-03 %1074 = fadd nsz float %1072, %1073 %1075 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %1074, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %1076 = fadd nsz float %1058, %1075 %1077 = insertelement <2 x i32> %543, i32 %1040, i32 1 %1078 = bitcast <2 x i32> %1077 to <2 x float> %1079 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %1078, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %1080 = extractelement <4 x float> %1079, i32 0 %1081 = extractelement <4 x float> %1079, i32 1 %1082 = fmul nsz float %1075, %1080 %1083 = fadd nsz float %1082, %1065 %1084 = fmul nsz float %1075, %1081 %1085 = fadd nsz float %1084, %1067 %1086 = fadd nsz float %154, %976 %1087 = call nsz float @llvm.sqrt.f32(float %1086) #2 %1088 = fmul nsz float %1087, 0x3FD3C36BE0000000 %1089 = fsub nsz float 1.000000e+00, %1088 %1090 = fmul nsz float %1088, 0x3FEFC00000000000 %1091 = fmul nsz float %1089, 7.812500e-03 %1092 = fadd nsz float %1090, %1091 %1093 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %1092, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %1094 = fadd nsz float %1076, %1093 %1095 = insertelement <2 x i32> %565, i32 %1040, i32 1 %1096 = bitcast <2 x i32> %1095 to <2 x float> %1097 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %1096, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %1098 = extractelement <4 x float> %1097, i32 0 %1099 = extractelement <4 x float> %1097, i32 1 %1100 = fmul nsz float %1093, %1098 %1101 = fadd nsz float %1100, %1083 %1102 = fmul nsz float %1093, %1099 %1103 = fadd nsz float %1102, %1085 %1104 = fadd nsz float %368, %976 %1105 = call nsz float @llvm.sqrt.f32(float %1104) #2 %1106 = fmul nsz float %1105, 0x3FD3C36BE0000000 %1107 = fcmp nsz olt float %1106, 1.000000e+00 br i1 %1107, label %if562, label %endif571 if562: ; preds = %endif508 %1108 = fsub nsz float 1.000000e+00, %1106 %1109 = fmul nsz float %1106, 0x3FEFC00000000000 %1110 = fmul nsz float %1108, 7.812500e-03 %1111 = fadd nsz float %1109, %1110 %1112 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %1111, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %1113 = fadd nsz float %1094, %1112 %1114 = fmul nsz float %40, 3.000000e+00 %1115 = fadd nsz float %1114, %44 %1116 = bitcast float %1115 to i32 %1117 = insertelement <2 x i32> undef, i32 %1116, i32 0 %1118 = insertelement <2 x i32> %1117, i32 %1040, i32 1 %1119 = bitcast <2 x i32> %1118 to <2 x float> %1120 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %1119, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %1121 = extractelement <4 x float> %1120, i32 0 %1122 = extractelement <4 x float> %1120, i32 1 %1123 = fmul nsz float %1112, %1121 %1124 = fadd nsz float %1123, %1101 %1125 = fmul nsz float %1112, %1122 %1126 = fadd nsz float %1125, %1103 br label %endif571 endif571: ; preds = %if562, %endif508 %TEMP5.x.24 = phi float [ %1113, %if562 ], [ %1094, %endif508 ] %TEMP0.y.24 = phi float [ %1126, %if562 ], [ %1103, %endif508 ] %TEMP0.x.24 = phi float [ %1124, %if562 ], [ %1101, %endif508 ] %1127 = fadd nsz float %599, %976 %1128 = call nsz float @llvm.sqrt.f32(float %1127) #2 %1129 = fmul nsz float %1128, 0x3FD3C36BE0000000 %1130 = fcmp nsz olt float %1129, 1.000000e+00 br i1 %1130, label %if577, label %endif586 if577: ; preds = %endif571 %1131 = fsub nsz float 1.000000e+00, %1129 %1132 = fmul nsz float %1129, 0x3FEFC00000000000 %1133 = fmul nsz float %1131, 7.812500e-03 %1134 = fadd nsz float %1132, %1133 %1135 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %1134, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %1136 = fadd nsz float %TEMP5.x.24, %1135 %1137 = fmul nsz float %40, 4.000000e+00 %1138 = fadd nsz float %1137, %44 %1139 = bitcast float %1138 to i32 %1140 = insertelement <2 x i32> undef, i32 %1139, i32 0 %1141 = insertelement <2 x i32> %1140, i32 %1040, i32 1 %1142 = bitcast <2 x i32> %1141 to <2 x float> %1143 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %1142, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %1144 = extractelement <4 x float> %1143, i32 0 %1145 = extractelement <4 x float> %1143, i32 1 %1146 = fmul nsz float %1135, %1144 %1147 = fadd nsz float %1146, %TEMP0.x.24 %1148 = fmul nsz float %1135, %1145 %1149 = fadd nsz float %1148, %TEMP0.y.24 br label %endif586 endif586: ; preds = %if577, %endif571 %TEMP5.x.25 = phi float [ %1136, %if577 ], [ %TEMP5.x.24, %endif571 ] %TEMP0.y.25 = phi float [ %1149, %if577 ], [ %TEMP0.y.24, %endif571 ] %TEMP0.x.25 = phi float [ %1147, %if577 ], [ %TEMP0.x.24, %endif571 ] %1150 = fsub nsz float 3.000000e+00, %39 %1151 = fmul nsz float %1150, %1150 %1152 = fadd nsz float %192, %1151 %1153 = call nsz float @llvm.sqrt.f32(float %1152) #2 %1154 = fmul nsz float %1153, 0x3FD3C36BE0000000 %1155 = fcmp nsz olt float %1154, 1.000000e+00 br i1 %1155, label %if592, label %endif601 if592: ; preds = %endif586 %1156 = fsub nsz float 1.000000e+00, %1154 %1157 = fmul nsz float %1154, 0x3FEFC00000000000 %1158 = fmul nsz float %1156, 7.812500e-03 %1159 = fadd nsz float %1157, %1158 %1160 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %1159, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %1161 = fadd nsz float %TEMP5.x.25, %1160 %1162 = fmul nsz float %40, -2.000000e+00 %1163 = fadd nsz float %1162, %44 %1164 = fmul nsz float %42, 3.000000e+00 %1165 = fadd nsz float %1164, %45 %1166 = bitcast float %1163 to i32 %1167 = bitcast float %1165 to i32 %1168 = insertelement <2 x i32> undef, i32 %1166, i32 0 %1169 = insertelement <2 x i32> %1168, i32 %1167, i32 1 %1170 = bitcast <2 x i32> %1169 to <2 x float> %1171 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %1170, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %1172 = extractelement <4 x float> %1171, i32 0 %1173 = extractelement <4 x float> %1171, i32 1 %1174 = fmul nsz float %1160, %1172 %1175 = fadd nsz float %1174, %TEMP0.x.25 %1176 = fmul nsz float %1160, %1173 %1177 = fadd nsz float %1176, %TEMP0.y.25 br label %endif601 endif601: ; preds = %if592, %endif586 %TEMP5.x.26 = phi float [ %1161, %if592 ], [ %TEMP5.x.25, %endif586 ] %TEMP0.y.26 = phi float [ %1177, %if592 ], [ %TEMP0.y.25, %endif586 ] %TEMP0.x.26 = phi float [ %1175, %if592 ], [ %TEMP0.x.25, %endif586 ] %1178 = fadd nsz float %48, %1151 %1179 = call nsz float @llvm.sqrt.f32(float %1178) #2 %1180 = fmul nsz float %1179, 0x3FD3C36BE0000000 %1181 = fcmp nsz olt float %1180, 1.000000e+00 br i1 %1181, label %if607, label %endif616 if607: ; preds = %endif601 %1182 = fsub nsz float 1.000000e+00, %1180 %1183 = fmul nsz float %1180, 0x3FEFC00000000000 %1184 = fmul nsz float %1182, 7.812500e-03 %1185 = fadd nsz float %1183, %1184 %1186 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %1185, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %1187 = fadd nsz float %TEMP5.x.26, %1186 %1188 = fmul nsz float %42, 3.000000e+00 %1189 = fadd nsz float %1188, %45 %1190 = bitcast float %1189 to i32 %1191 = insertelement <2 x i32> %690, i32 %1190, i32 1 %1192 = bitcast <2 x i32> %1191 to <2 x float> %1193 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %1192, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %1194 = extractelement <4 x float> %1193, i32 0 %1195 = extractelement <4 x float> %1193, i32 1 %1196 = fmul nsz float %1186, %1194 %1197 = fadd nsz float %1196, %TEMP0.x.26 %1198 = fmul nsz float %1186, %1195 %1199 = fadd nsz float %1198, %TEMP0.y.26 br label %endif616 endif616: ; preds = %if607, %endif601 %TEMP5.x.27 = phi float [ %1187, %if607 ], [ %TEMP5.x.26, %endif601 ] %TEMP0.y.27 = phi float [ %1199, %if607 ], [ %TEMP0.y.26, %endif601 ] %TEMP0.x.27 = phi float [ %1197, %if607 ], [ %TEMP0.x.26, %endif601 ] %1200 = fadd nsz float %81, %1151 %1201 = call nsz float @llvm.sqrt.f32(float %1200) #2 %1202 = fmul nsz float %1201, 0x3FD3C36BE0000000 %1203 = fcmp nsz olt float %1202, 1.000000e+00 br i1 %1203, label %if622, label %endif631 if622: ; preds = %endif616 %1204 = fsub nsz float 1.000000e+00, %1202 %1205 = fmul nsz float %1202, 0x3FEFC00000000000 %1206 = fmul nsz float %1204, 7.812500e-03 %1207 = fadd nsz float %1205, %1206 %1208 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %1207, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %1209 = fadd nsz float %TEMP5.x.27, %1208 %1210 = fmul nsz float %42, 3.000000e+00 %1211 = fadd nsz float %1210, %45 %1212 = bitcast float %1211 to i32 %1213 = insertelement <2 x i32> %522, i32 %1212, i32 1 %1214 = bitcast <2 x i32> %1213 to <2 x float> %1215 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %1214, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %1216 = extractelement <4 x float> %1215, i32 0 %1217 = extractelement <4 x float> %1215, i32 1 %1218 = fmul nsz float %1208, %1216 %1219 = fadd nsz float %1218, %TEMP0.x.27 %1220 = fmul nsz float %1208, %1217 %1221 = fadd nsz float %1220, %TEMP0.y.27 br label %endif631 endif631: ; preds = %if622, %endif616 %TEMP5.x.28 = phi float [ %1209, %if622 ], [ %TEMP5.x.27, %endif616 ] %TEMP0.y.28 = phi float [ %1221, %if622 ], [ %TEMP0.y.27, %endif616 ] %TEMP0.x.28 = phi float [ %1219, %if622 ], [ %TEMP0.x.27, %endif616 ] %1222 = fadd nsz float %118, %1151 %1223 = call nsz float @llvm.sqrt.f32(float %1222) #2 %1224 = fmul nsz float %1223, 0x3FD3C36BE0000000 %1225 = fcmp nsz olt float %1224, 1.000000e+00 br i1 %1225, label %if637, label %endif646 if637: ; preds = %endif631 %1226 = fsub nsz float 1.000000e+00, %1224 %1227 = fmul nsz float %1224, 0x3FEFC00000000000 %1228 = fmul nsz float %1226, 7.812500e-03 %1229 = fadd nsz float %1227, %1228 %1230 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %1229, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %1231 = fadd nsz float %TEMP5.x.28, %1230 %1232 = fmul nsz float %42, 3.000000e+00 %1233 = fadd nsz float %1232, %45 %1234 = bitcast float %1233 to i32 %1235 = insertelement <2 x i32> %543, i32 %1234, i32 1 %1236 = bitcast <2 x i32> %1235 to <2 x float> %1237 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %1236, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %1238 = extractelement <4 x float> %1237, i32 0 %1239 = extractelement <4 x float> %1237, i32 1 %1240 = fmul nsz float %1230, %1238 %1241 = fadd nsz float %1240, %TEMP0.x.28 %1242 = fmul nsz float %1230, %1239 %1243 = fadd nsz float %1242, %TEMP0.y.28 br label %endif646 endif646: ; preds = %if637, %endif631 %TEMP5.x.29 = phi float [ %1231, %if637 ], [ %TEMP5.x.28, %endif631 ] %TEMP0.y.29 = phi float [ %1243, %if637 ], [ %TEMP0.y.28, %endif631 ] %TEMP0.x.29 = phi float [ %1241, %if637 ], [ %TEMP0.x.28, %endif631 ] %1244 = fadd nsz float %154, %1151 %1245 = call nsz float @llvm.sqrt.f32(float %1244) #2 %1246 = fmul nsz float %1245, 0x3FD3C36BE0000000 %1247 = fcmp nsz olt float %1246, 1.000000e+00 br i1 %1247, label %if652, label %endif661 if652: ; preds = %endif646 %1248 = fsub nsz float 1.000000e+00, %1246 %1249 = fmul nsz float %1246, 0x3FEFC00000000000 %1250 = fmul nsz float %1248, 7.812500e-03 %1251 = fadd nsz float %1249, %1250 %1252 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %1251, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %1253 = fadd nsz float %TEMP5.x.29, %1252 %1254 = fmul nsz float %42, 3.000000e+00 %1255 = fadd nsz float %1254, %45 %1256 = bitcast float %1255 to i32 %1257 = insertelement <2 x i32> %565, i32 %1256, i32 1 %1258 = bitcast <2 x i32> %1257 to <2 x float> %1259 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %1258, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %1260 = extractelement <4 x float> %1259, i32 0 %1261 = extractelement <4 x float> %1259, i32 1 %1262 = fmul nsz float %1252, %1260 %1263 = fadd nsz float %1262, %TEMP0.x.29 %1264 = fmul nsz float %1252, %1261 %1265 = fadd nsz float %1264, %TEMP0.y.29 br label %endif661 endif661: ; preds = %if652, %endif646 %TEMP5.x.30 = phi float [ %1253, %if652 ], [ %TEMP5.x.29, %endif646 ] %TEMP0.y.30 = phi float [ %1265, %if652 ], [ %TEMP0.y.29, %endif646 ] %TEMP0.x.30 = phi float [ %1263, %if652 ], [ %TEMP0.x.29, %endif646 ] %1266 = fadd nsz float %368, %1151 %1267 = call nsz float @llvm.sqrt.f32(float %1266) #2 %1268 = fmul nsz float %1267, 0x3FD3C36BE0000000 %1269 = fcmp nsz olt float %1268, 1.000000e+00 br i1 %1269, label %if667, label %endif676 if667: ; preds = %endif661 %1270 = fsub nsz float 1.000000e+00, %1268 %1271 = fmul nsz float %1268, 0x3FEFC00000000000 %1272 = fmul nsz float %1270, 7.812500e-03 %1273 = fadd nsz float %1271, %1272 %1274 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %1273, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %1275 = fadd nsz float %TEMP5.x.30, %1274 %1276 = fmul nsz float %40, 3.000000e+00 %1277 = fadd nsz float %1276, %44 %1278 = fmul nsz float %42, 3.000000e+00 %1279 = fadd nsz float %1278, %45 %1280 = bitcast float %1277 to i32 %1281 = bitcast float %1279 to i32 %1282 = insertelement <2 x i32> undef, i32 %1280, i32 0 %1283 = insertelement <2 x i32> %1282, i32 %1281, i32 1 %1284 = bitcast <2 x i32> %1283 to <2 x float> %1285 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %1284, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %1286 = extractelement <4 x float> %1285, i32 0 %1287 = extractelement <4 x float> %1285, i32 1 %1288 = fmul nsz float %1274, %1286 %1289 = fadd nsz float %1288, %TEMP0.x.30 %1290 = fmul nsz float %1274, %1287 %1291 = fadd nsz float %1290, %TEMP0.y.30 br label %endif676 endif676: ; preds = %if667, %endif661 %TEMP5.x.31 = phi float [ %1275, %if667 ], [ %TEMP5.x.30, %endif661 ] %TEMP0.y.31 = phi float [ %1291, %if667 ], [ %TEMP0.y.30, %endif661 ] %TEMP0.x.31 = phi float [ %1289, %if667 ], [ %TEMP0.x.30, %endif661 ] %1292 = fsub nsz float 4.000000e+00, %39 %1293 = fmul nsz float %1292, %1292 %1294 = fadd nsz float %48, %1293 %1295 = call nsz float @llvm.sqrt.f32(float %1294) #2 %1296 = fmul nsz float %1295, 0x3FD3C36BE0000000 %1297 = fcmp nsz olt float %1296, 1.000000e+00 br i1 %1297, label %if682, label %endif691 if682: ; preds = %endif676 %1298 = fsub nsz float 1.000000e+00, %1296 %1299 = fmul nsz float %1296, 0x3FEFC00000000000 %1300 = fmul nsz float %1298, 7.812500e-03 %1301 = fadd nsz float %1299, %1300 %1302 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %1301, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %1303 = fadd nsz float %TEMP5.x.31, %1302 %1304 = fmul nsz float %42, 4.000000e+00 %1305 = fadd nsz float %1304, %45 %1306 = bitcast float %1305 to i32 %1307 = insertelement <2 x i32> %690, i32 %1306, i32 1 %1308 = bitcast <2 x i32> %1307 to <2 x float> %1309 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %1308, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %1310 = extractelement <4 x float> %1309, i32 0 %1311 = extractelement <4 x float> %1309, i32 1 %1312 = fmul nsz float %1302, %1310 %1313 = fadd nsz float %1312, %TEMP0.x.31 %1314 = fmul nsz float %1302, %1311 %1315 = fadd nsz float %1314, %TEMP0.y.31 br label %endif691 endif691: ; preds = %if682, %endif676 %TEMP5.x.32 = phi float [ %1303, %if682 ], [ %TEMP5.x.31, %endif676 ] %TEMP0.y.32 = phi float [ %1315, %if682 ], [ %TEMP0.y.31, %endif676 ] %TEMP0.x.32 = phi float [ %1313, %if682 ], [ %TEMP0.x.31, %endif676 ] %1316 = fadd nsz float %81, %1293 %1317 = call nsz float @llvm.sqrt.f32(float %1316) #2 %1318 = fmul nsz float %1317, 0x3FD3C36BE0000000 %1319 = fcmp nsz olt float %1318, 1.000000e+00 br i1 %1319, label %if697, label %endif706 if697: ; preds = %endif691 %1320 = fsub nsz float 1.000000e+00, %1318 %1321 = fmul nsz float %1318, 0x3FEFC00000000000 %1322 = fmul nsz float %1320, 7.812500e-03 %1323 = fadd nsz float %1321, %1322 %1324 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %1323, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %1325 = fadd nsz float %TEMP5.x.32, %1324 %1326 = fmul nsz float %42, 4.000000e+00 %1327 = fadd nsz float %1326, %45 %1328 = bitcast float %1327 to i32 %1329 = insertelement <2 x i32> %522, i32 %1328, i32 1 %1330 = bitcast <2 x i32> %1329 to <2 x float> %1331 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %1330, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %1332 = extractelement <4 x float> %1331, i32 0 %1333 = extractelement <4 x float> %1331, i32 1 %1334 = fmul nsz float %1324, %1332 %1335 = fadd nsz float %1334, %TEMP0.x.32 %1336 = fmul nsz float %1324, %1333 %1337 = fadd nsz float %1336, %TEMP0.y.32 br label %endif706 endif706: ; preds = %if697, %endif691 %TEMP5.x.33 = phi float [ %1325, %if697 ], [ %TEMP5.x.32, %endif691 ] %TEMP0.y.33 = phi float [ %1337, %if697 ], [ %TEMP0.y.32, %endif691 ] %TEMP0.x.33 = phi float [ %1335, %if697 ], [ %TEMP0.x.32, %endif691 ] %1338 = fadd nsz float %118, %1293 %1339 = call nsz float @llvm.sqrt.f32(float %1338) #2 %1340 = fmul nsz float %1339, 0x3FD3C36BE0000000 %1341 = fcmp nsz olt float %1340, 1.000000e+00 br i1 %1341, label %if712, label %endif721 if712: ; preds = %endif706 %1342 = fsub nsz float 1.000000e+00, %1340 %1343 = fmul nsz float %1340, 0x3FEFC00000000000 %1344 = fmul nsz float %1342, 7.812500e-03 %1345 = fadd nsz float %1343, %1344 %1346 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %1345, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %1347 = fadd nsz float %TEMP5.x.33, %1346 %1348 = fmul nsz float %42, 4.000000e+00 %1349 = fadd nsz float %1348, %45 %1350 = bitcast float %1349 to i32 %1351 = insertelement <2 x i32> %543, i32 %1350, i32 1 %1352 = bitcast <2 x i32> %1351 to <2 x float> %1353 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %1352, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %1354 = extractelement <4 x float> %1353, i32 0 %1355 = extractelement <4 x float> %1353, i32 1 %1356 = fmul nsz float %1346, %1354 %1357 = fadd nsz float %1356, %TEMP0.x.33 %1358 = fmul nsz float %1346, %1355 %1359 = fadd nsz float %1358, %TEMP0.y.33 br label %endif721 endif721: ; preds = %if712, %endif706 %TEMP5.x.34 = phi float [ %1347, %if712 ], [ %TEMP5.x.33, %endif706 ] %TEMP0.y.34 = phi float [ %1359, %if712 ], [ %TEMP0.y.33, %endif706 ] %TEMP0.x.34 = phi float [ %1357, %if712 ], [ %TEMP0.x.33, %endif706 ] %1360 = fadd nsz float %154, %1293 %1361 = call nsz float @llvm.sqrt.f32(float %1360) #2 %1362 = fmul nsz float %1361, 0x3FD3C36BE0000000 %1363 = fcmp nsz olt float %1362, 1.000000e+00 br i1 %1363, label %if727, label %endif736 if727: ; preds = %endif721 %1364 = fsub nsz float 1.000000e+00, %1362 %1365 = fmul nsz float %1362, 0x3FEFC00000000000 %1366 = fmul nsz float %1364, 7.812500e-03 %1367 = fadd nsz float %1365, %1366 %1368 = call float @llvm.amdgcn.image.sample.f32.f32.v8i32(float %1367, <8 x i32> %484, <4 x i32> %487, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %1369 = fadd nsz float %TEMP5.x.34, %1368 %1370 = fmul nsz float %42, 4.000000e+00 %1371 = fadd nsz float %1370, %45 %1372 = bitcast float %1371 to i32 %1373 = insertelement <2 x i32> %565, i32 %1372, i32 1 %1374 = bitcast <2 x i32> %1373 to <2 x float> %1375 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %1374, <8 x i32> %493, <4 x i32> %495, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #2 %1376 = extractelement <4 x float> %1375, i32 0 %1377 = extractelement <4 x float> %1375, i32 1 %1378 = fmul nsz float %1368, %1376 %1379 = fadd nsz float %1378, %TEMP0.x.34 %1380 = fmul nsz float %1368, %1377 %1381 = fadd nsz float %1380, %TEMP0.y.34 br label %endif736 endif736: ; preds = %if727, %endif721 %TEMP5.x.35 = phi float [ %1369, %if727 ], [ %TEMP5.x.34, %endif721 ] %TEMP0.y.35 = phi float [ %1381, %if727 ], [ %TEMP0.y.34, %endif721 ] %TEMP0.x.35 = phi float [ %1379, %if727 ], [ %TEMP0.x.34, %endif721 ] %1382 = fdiv nsz float 1.000000e+00, %TEMP5.x.35, !fpmath !1 %1383 = fmul nsz float %TEMP0.x.35, %1382 %1384 = fmul nsz float %TEMP0.y.35, %1382 %1385 = bitcast float %3 to i32 %1386 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %1385, 6 %1387 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1386, float %1383, 7 %1388 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1387, float %1384, 8 %1389 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1388, float 0.000000e+00, 9 %1390 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1389, float 1.000000e+00, 10 %1391 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1390, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1391 } ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3 ; Function Attrs: nounwind readonly declare float @llvm.amdgcn.image.sample.f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind readnone } attributes #3 = { nounwind readonly } !0 = !{} !1 = !{float 2.500000e+00} SHADER KEY part.vs.prolog.instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} mono.vs.fix_fetch = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 mono.u.vs_export_prim_id = 0 opt.kill_outputs[0] = 0x0 opt.kill_outputs[1] = 0x0 opt.clip_disable = 0 Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s8, v0 ; 32080008 v_mov_b32_e32 v5, v4 ; 7E0A0304 Shader main disassembly: s_load_dwordx4 s[0:3], s[6:7], 0x0 ; C00A0003 00000000 s_load_dwordx4 s[4:7], s[6:7], 0x10 ; C00A0103 00000010 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v5, s[4:7], 0 idxen ; E00C2000 80010005 s_waitcnt vmcnt(0) ; BF8C0F70 buffer_load_format_xyzw v[2:5], v4, s[0:3], 0 idxen ; E00C2000 80000204 exp param0 v0, v1, v0, v0 ; C400020F 00000100 s_waitcnt expcnt(0) ; BF8C0F0F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 s_waitcnt vmcnt(0) ; BF8C0F70 exp pos0 v2, v3, v0, v0 done ; C40008CF 00000302 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x5 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b64 s[0:1], exec ; BE80017E s_wqm_b64 exec, exec ; BEFE077E s_load_dwordx4 s[8:11], s[2:3], 0x100 ; C00A0201 00000100 s_mov_b32 m0, s7 ; BEFC0007 v_interp_p1_f32 v0, v2, attr0.x ; D4000002 v_interp_p1_f32 v2, v2, attr0.y ; D4080102 v_interp_p2_f32 v0, v3, attr0.x ; D4010003 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s7, s[8:11], 0x0 ; C02201C4 00000000 s_buffer_load_dword s12, s[8:11], 0x4 ; C0220304 00000004 v_interp_p2_f32 v2, v3, attr0.y ; D4090103 s_buffer_load_dword s2, s[8:11], 0x14 ; C0220084 00000014 s_buffer_load_dword s3, s[8:11], 0x10 ; C02200C4 00000010 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v0, s7, -0.5 ; D1C10001 03C40F00 v_floor_f32_e32 v4, v1 ; 7E083F01 v_mad_f32 v3, v2, s12, -0.5 ; D1C10003 03C41902 v_subrev_f32_e32 v21, v4, v1 ; 062A0304 v_floor_f32_e32 v1, v3 ; 7E023F03 v_subrev_f32_e32 v3, v1, v3 ; 06060701 v_sub_f32_e32 v5, 0xc0400000, v3 ; 040A06FF C0400000 v_mul_f32_e32 v9, v5, v5 ; 0A120B05 v_sub_f32_e32 v4, -1.0, v21 ; 04082AF3 v_mad_f32 v1, -v3, s2, v2 ; D1C10001 24080503 v_mul_f32_e32 v2, v4, v4 ; 0A040904 v_mad_f32 v4, v4, v4, v9 ; D1C10004 04260904 v_sqrt_f32_e32 v4, v4 ; 7E084F04 v_mul_f32_e32 v6, 0x3e9e1b5f, v4 ; 0A0C08FF 3E9E1B5F v_mad_f32 v0, -v21, s3, v0 ; D1C10000 24000715 v_cmp_gt_f32_e32 vcc, 1.0, v6 ; 7C880CF2 v_mov_b32_e32 v11, 0 ; 7E160280 v_mov_b32_e32 v4, 0 ; 7E080280 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_and_saveexec_b64 s[8:9], vcc ; BE88206A s_xor_b64 s[8:9], exec, s[8:9] ; 8888087E s_cbranch_execz BB0_2 ; BF880000 s_load_dwordx8 s[12:19], s[4:5], 0x200 ; C00E0302 00000200 s_load_dwordx4 s[28:31], s[4:5], 0x230 ; C00A0702 00000230 v_sub_f32_e32 v4, 1.0, v6 ; 04080CF2 v_mul_f32_e32 v4, 0x3c000000, v4 ; 0A0808FF 3C000000 v_mac_f32_e32 v4, 0x3f7e0000, v6 ; 2C080CFF 3F7E0000 s_load_dwordx8 s[20:27], s[4:5], 0x240 ; C00E0502 00000240 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v11, v4, s[12:19], s[28:31] dmask:0x1 ; F0800100 00E30B04 s_load_dwordx4 s[12:15], s[4:5], 0x270 ; C00A0302 00000270 v_mov_b32_e32 v5, 0xc0400000 ; 7E0A02FF C0400000 v_subrev_f32_e32 v4, s3, v0 ; 06080003 v_mad_f32 v5, s2, v5, v1 ; D1C10005 04060A02 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[5:6], v[4:5], s[20:27], s[12:15] dmask:0x3 ; F0800300 00650504 s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v5, v5, v11 ; 0A0A1705 v_mul_f32_e32 v4, v6, v11 ; 0A081706 s_or_b64 exec, exec, s[8:9] ; 87FE087E v_mad_f32 v7, v21, v21, v9 ; D1C10007 04262B15 v_sqrt_f32_e32 v7, v7 ; 7E0E4F07 v_mul_f32_e32 v7, 0x3e9e1b5f, v7 ; 0A0E0EFF 3E9E1B5F v_mul_f32_e32 v6, v21, v21 ; 0A0C2B15 v_cmp_gt_f32_e32 vcc, 1.0, v7 ; 7C880EF2 s_and_saveexec_b64 s[8:9], vcc ; BE88206A s_xor_b64 s[8:9], exec, s[8:9] ; 8888087E s_cbranch_execz BB0_4 ; BF880000 s_load_dwordx8 s[12:19], s[4:5], 0x200 ; C00E0302 00000200 s_load_dwordx4 s[28:31], s[4:5], 0x230 ; C00A0702 00000230 v_sub_f32_e32 v8, 1.0, v7 ; 04100EF2 v_mul_f32_e32 v8, 0x3c000000, v8 ; 0A1010FF 3C000000 v_mac_f32_e32 v8, 0x3f7e0000, v7 ; 2C100EFF 3F7E0000 s_load_dwordx8 s[20:27], s[4:5], 0x240 ; C00E0502 00000240 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v10, v8, s[12:19], s[28:31] dmask:0x1 ; F0800100 00E30A08 s_load_dwordx4 s[12:15], s[4:5], 0x270 ; C00A0302 00000270 v_mov_b32_e32 v8, 0xc0400000 ; 7E1002FF C0400000 v_mad_f32 v7, 0, s3, v0 ; D1C10007 04000680 v_mad_f32 v8, s2, v8, v1 ; D1C10008 04061002 s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v11, v10, v11 ; 0216170A s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[7:8], v[7:8], s[20:27], s[12:15] dmask:0x3 ; F0800300 00650707 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v7, v10 ; 2C0A1507 v_mac_f32_e32 v4, v8, v10 ; 2C081508 s_or_b64 exec, exec, s[8:9] ; 87FE087E v_sub_f32_e32 v8, 1.0, v21 ; 04102AF2 v_mul_f32_e32 v7, v8, v8 ; 0A0E1108 v_mad_f32 v8, v8, v8, v9 ; D1C10008 04261108 v_sqrt_f32_e32 v8, v8 ; 7E104F08 v_mul_f32_e32 v8, 0x3e9e1b5f, v8 ; 0A1010FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v8 ; 7C8810F2 s_and_saveexec_b64 s[8:9], vcc ; BE88206A s_xor_b64 s[8:9], exec, s[8:9] ; 8888087E s_cbranch_execz BB0_6 ; BF880000 s_load_dwordx8 s[12:19], s[4:5], 0x200 ; C00E0302 00000200 s_load_dwordx4 s[28:31], s[4:5], 0x230 ; C00A0702 00000230 v_sub_f32_e32 v10, 1.0, v8 ; 041410F2 v_mul_f32_e32 v10, 0x3c000000, v10 ; 0A1414FF 3C000000 v_mac_f32_e32 v10, 0x3f7e0000, v8 ; 2C1410FF 3F7E0000 s_load_dwordx8 s[20:27], s[4:5], 0x240 ; C00E0502 00000240 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v8, v10, s[12:19], s[28:31] dmask:0x1 ; F0800100 00E3080A s_load_dwordx4 s[12:15], s[4:5], 0x270 ; C00A0302 00000270 v_mov_b32_e32 v10, 0xc0400000 ; 7E1402FF C0400000 v_add_f32_e32 v14, s3, v0 ; 021C0003 v_mad_f32 v15, s2, v10, v1 ; D1C1000F 04061402 s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v11, v8, v11 ; 02161708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[14:15], v[14:15], s[20:27], s[12:15] dmask:0x3 ; F0800300 00650E0E s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v14, v8 ; 2C0A110E v_mac_f32_e32 v4, v15, v8 ; 2C08110F s_or_b64 exec, exec, s[8:9] ; 87FE087E v_sub_f32_e32 v10, 2.0, v21 ; 04142AF4 v_mac_f32_e32 v9, v10, v10 ; 2C12150A v_sqrt_f32_e32 v9, v9 ; 7E124F09 v_mul_f32_e32 v9, 0x3e9e1b5f, v9 ; 0A1212FF 3E9E1B5F v_mul_f32_e32 v8, v10, v10 ; 0A10150A v_cmp_gt_f32_e32 vcc, 1.0, v9 ; 7C8812F2 s_and_saveexec_b64 s[8:9], vcc ; BE88206A s_xor_b64 s[8:9], exec, s[8:9] ; 8888087E s_cbranch_execz BB0_8 ; BF880000 s_load_dwordx8 s[12:19], s[4:5], 0x200 ; C00E0302 00000200 s_load_dwordx4 s[28:31], s[4:5], 0x230 ; C00A0702 00000230 v_sub_f32_e32 v10, 1.0, v9 ; 041412F2 v_mul_f32_e32 v10, 0x3c000000, v10 ; 0A1414FF 3C000000 v_mac_f32_e32 v10, 0x3f7e0000, v9 ; 2C1412FF 3F7E0000 s_load_dwordx8 s[20:27], s[4:5], 0x240 ; C00E0502 00000240 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v12, v10, s[12:19], s[28:31] dmask:0x1 ; F0800100 00E30C0A s_load_dwordx4 s[12:15], s[4:5], 0x270 ; C00A0302 00000270 v_mov_b32_e32 v10, 0xc0400000 ; 7E1402FF C0400000 v_mad_f32 v9, 2.0, s3, v0 ; D1C10009 040006F4 v_mad_f32 v10, s2, v10, v1 ; D1C1000A 04061402 s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v11, v12, v11 ; 0216170C s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[9:10], v[9:10], s[20:27], s[12:15] dmask:0x3 ; F0800300 00650909 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v9, v12 ; 2C0A1909 v_mac_f32_e32 v4, v10, v12 ; 2C08190A s_or_b64 exec, exec, s[8:9] ; 87FE087E v_sub_f32_e32 v12, -2.0, v3 ; 041806F5 v_sub_f32_e32 v10, -2.0, v21 ; 04142AF5 v_mul_f32_e32 v12, v12, v12 ; 0A18190C v_mul_f32_e32 v9, v10, v10 ; 0A12150A v_mad_f32 v10, v10, v10, v12 ; D1C1000A 0432150A v_sqrt_f32_e32 v10, v10 ; 7E144F0A v_mul_f32_e32 v10, 0x3e9e1b5f, v10 ; 0A1414FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v10 ; 7C8814F2 s_and_saveexec_b64 s[8:9], vcc ; BE88206A s_xor_b64 s[8:9], exec, s[8:9] ; 8888087E s_cbranch_execz BB0_10 ; BF880000 s_load_dwordx8 s[12:19], s[4:5], 0x200 ; C00E0302 00000200 s_load_dwordx4 s[28:31], s[4:5], 0x230 ; C00A0702 00000230 v_sub_f32_e32 v14, 1.0, v10 ; 041C14F2 v_mul_f32_e32 v14, 0x3c000000, v14 ; 0A1C1CFF 3C000000 v_mac_f32_e32 v14, 0x3f7e0000, v10 ; 2C1C14FF 3F7E0000 s_load_dwordx8 s[20:27], s[4:5], 0x240 ; C00E0502 00000240 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v10, v14, s[12:19], s[28:31] dmask:0x1 ; F0800100 00E30A0E s_load_dwordx4 s[12:15], s[4:5], 0x270 ; C00A0302 00000270 v_mad_f32 v14, -2.0, s3, v0 ; D1C1000E 040006F5 v_mad_f32 v15, -2.0, s2, v1 ; D1C1000F 040404F5 s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v11, v10, v11 ; 0216170A s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[14:15], v[14:15], s[20:27], s[12:15] dmask:0x3 ; F0800300 00650E0E s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v14, v10 ; 2C0A150E v_mac_f32_e32 v4, v15, v10 ; 2C08150F s_or_b64 exec, exec, s[8:9] ; 87FE087E v_add_f32_e32 v10, v12, v2 ; 0214050C v_sqrt_f32_e32 v10, v10 ; 7E144F0A v_mul_f32_e32 v10, 0x3e9e1b5f, v10 ; 0A1414FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v10 ; 7C8814F2 s_and_saveexec_b64 s[8:9], vcc ; BE88206A s_xor_b64 s[8:9], exec, s[8:9] ; 8888087E s_cbranch_execz BB0_12 ; BF880000 s_load_dwordx8 s[12:19], s[4:5], 0x200 ; C00E0302 00000200 s_load_dwordx4 s[28:31], s[4:5], 0x230 ; C00A0702 00000230 v_sub_f32_e32 v14, 1.0, v10 ; 041C14F2 v_mul_f32_e32 v14, 0x3c000000, v14 ; 0A1C1CFF 3C000000 v_mac_f32_e32 v14, 0x3f7e0000, v10 ; 2C1C14FF 3F7E0000 s_load_dwordx8 s[20:27], s[4:5], 0x240 ; C00E0502 00000240 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v10, v14, s[12:19], s[28:31] dmask:0x1 ; F0800100 00E30A0E s_load_dwordx4 s[12:15], s[4:5], 0x270 ; C00A0302 00000270 v_subrev_f32_e32 v14, s3, v0 ; 061C0003 v_mad_f32 v15, -2.0, s2, v1 ; D1C1000F 040404F5 s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v11, v10, v11 ; 0216170A s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[14:15], v[14:15], s[20:27], s[12:15] dmask:0x3 ; F0800300 00650E0E s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v14, v10 ; 2C0A150E v_mac_f32_e32 v4, v15, v10 ; 2C08150F s_or_b64 exec, exec, s[8:9] ; 87FE087E v_add_f32_e32 v10, v12, v6 ; 02140D0C v_sqrt_f32_e32 v10, v10 ; 7E144F0A v_mul_f32_e32 v10, 0x3e9e1b5f, v10 ; 0A1414FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v10 ; 7C8814F2 s_and_saveexec_b64 s[8:9], vcc ; BE88206A s_xor_b64 s[8:9], exec, s[8:9] ; 8888087E s_cbranch_execz BB0_14 ; BF880000 s_load_dwordx8 s[12:19], s[4:5], 0x200 ; C00E0302 00000200 s_load_dwordx4 s[28:31], s[4:5], 0x230 ; C00A0702 00000230 v_sub_f32_e32 v14, 1.0, v10 ; 041C14F2 v_mul_f32_e32 v14, 0x3c000000, v14 ; 0A1C1CFF 3C000000 v_mac_f32_e32 v14, 0x3f7e0000, v10 ; 2C1C14FF 3F7E0000 s_load_dwordx8 s[20:27], s[4:5], 0x240 ; C00E0502 00000240 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v10, v14, s[12:19], s[28:31] dmask:0x1 ; F0800100 00E30A0E s_load_dwordx4 s[12:15], s[4:5], 0x270 ; C00A0302 00000270 v_mad_f32 v14, 0, s3, v0 ; D1C1000E 04000680 v_mad_f32 v15, -2.0, s2, v1 ; D1C1000F 040404F5 s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v11, v10, v11 ; 0216170A s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[14:15], v[14:15], s[20:27], s[12:15] dmask:0x3 ; F0800300 00650E0E s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v14, v10 ; 2C0A150E v_mac_f32_e32 v4, v15, v10 ; 2C08150F s_or_b64 exec, exec, s[8:9] ; 87FE087E v_add_f32_e32 v10, v12, v7 ; 02140F0C v_sqrt_f32_e32 v10, v10 ; 7E144F0A v_mul_f32_e32 v10, 0x3e9e1b5f, v10 ; 0A1414FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v10 ; 7C8814F2 s_and_saveexec_b64 s[8:9], vcc ; BE88206A s_xor_b64 s[8:9], exec, s[8:9] ; 8888087E s_cbranch_execz BB0_16 ; BF880000 s_load_dwordx8 s[12:19], s[4:5], 0x200 ; C00E0302 00000200 s_load_dwordx4 s[28:31], s[4:5], 0x230 ; C00A0702 00000230 v_sub_f32_e32 v14, 1.0, v10 ; 041C14F2 v_mul_f32_e32 v14, 0x3c000000, v14 ; 0A1C1CFF 3C000000 v_mac_f32_e32 v14, 0x3f7e0000, v10 ; 2C1C14FF 3F7E0000 s_load_dwordx8 s[20:27], s[4:5], 0x240 ; C00E0502 00000240 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v10, v14, s[12:19], s[28:31] dmask:0x1 ; F0800100 00E30A0E s_load_dwordx4 s[12:15], s[4:5], 0x270 ; C00A0302 00000270 v_add_f32_e32 v14, s3, v0 ; 021C0003 v_mad_f32 v15, -2.0, s2, v1 ; D1C1000F 040404F5 s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v11, v10, v11 ; 0216170A s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[14:15], v[14:15], s[20:27], s[12:15] dmask:0x3 ; F0800300 00650E0E s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v14, v10 ; 2C0A150E v_mac_f32_e32 v4, v15, v10 ; 2C08150F s_or_b64 exec, exec, s[8:9] ; 87FE087E v_add_f32_e32 v10, v12, v8 ; 0214110C v_sqrt_f32_e32 v10, v10 ; 7E144F0A v_mul_f32_e32 v10, 0x3e9e1b5f, v10 ; 0A1414FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v10 ; 7C8814F2 s_and_saveexec_b64 s[8:9], vcc ; BE88206A s_xor_b64 s[8:9], exec, s[8:9] ; 8888087E s_cbranch_execz BB0_18 ; BF880000 s_load_dwordx8 s[12:19], s[4:5], 0x200 ; C00E0302 00000200 s_load_dwordx4 s[28:31], s[4:5], 0x230 ; C00A0702 00000230 v_sub_f32_e32 v14, 1.0, v10 ; 041C14F2 v_mul_f32_e32 v14, 0x3c000000, v14 ; 0A1C1CFF 3C000000 v_mac_f32_e32 v14, 0x3f7e0000, v10 ; 2C1C14FF 3F7E0000 s_load_dwordx8 s[20:27], s[4:5], 0x240 ; C00E0502 00000240 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v10, v14, s[12:19], s[28:31] dmask:0x1 ; F0800100 00E30A0E s_load_dwordx4 s[12:15], s[4:5], 0x270 ; C00A0302 00000270 v_mad_f32 v14, 2.0, s3, v0 ; D1C1000E 040006F4 v_mad_f32 v15, -2.0, s2, v1 ; D1C1000F 040404F5 s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v11, v10, v11 ; 0216170A s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[14:15], v[14:15], s[20:27], s[12:15] dmask:0x3 ; F0800300 00650E0E s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v14, v10 ; 2C0A150E v_mac_f32_e32 v4, v15, v10 ; 2C08150F s_or_b64 exec, exec, s[8:9] ; 87FE087E v_sub_f32_e32 v14, 0x40400000, v21 ; 041C2AFF 40400000 v_mac_f32_e32 v12, v14, v14 ; 2C181D0E v_sqrt_f32_e32 v12, v12 ; 7E184F0C v_mul_f32_e32 v12, 0x3e9e1b5f, v12 ; 0A1818FF 3E9E1B5F v_mul_f32_e32 v10, v14, v14 ; 0A141D0E v_cmp_gt_f32_e32 vcc, 1.0, v12 ; 7C8818F2 s_and_saveexec_b64 s[8:9], vcc ; BE88206A s_xor_b64 s[8:9], exec, s[8:9] ; 8888087E s_cbranch_execz BB0_20 ; BF880000 s_load_dwordx8 s[12:19], s[4:5], 0x200 ; C00E0302 00000200 s_load_dwordx4 s[28:31], s[4:5], 0x230 ; C00A0702 00000230 v_sub_f32_e32 v14, 1.0, v12 ; 041C18F2 v_mul_f32_e32 v14, 0x3c000000, v14 ; 0A1C1CFF 3C000000 v_mac_f32_e32 v14, 0x3f7e0000, v12 ; 2C1C18FF 3F7E0000 s_load_dwordx8 s[20:27], s[4:5], 0x240 ; C00E0502 00000240 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v12, v14, s[12:19], s[28:31] dmask:0x1 ; F0800100 00E30C0E s_load_dwordx4 s[12:15], s[4:5], 0x270 ; C00A0302 00000270 v_mov_b32_e32 v14, 0x40400000 ; 7E1C02FF 40400000 v_mad_f32 v14, s3, v14, v0 ; D1C1000E 04021C03 v_mad_f32 v15, -2.0, s2, v1 ; D1C1000F 040404F5 s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v11, v12, v11 ; 0216170C s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[14:15], v[14:15], s[20:27], s[12:15] dmask:0x3 ; F0800300 00650E0E s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v14, v12 ; 2C0A190E v_mac_f32_e32 v4, v15, v12 ; 2C08190F s_or_b64 exec, exec, s[8:9] ; 87FE087E v_sub_f32_e32 v14, -1.0, v3 ; 041C06F3 v_sub_f32_e32 v12, 0xc0400000, v21 ; 04182AFF C0400000 v_mul_f32_e32 v22, v14, v14 ; 0A2C1D0E v_mul_f32_e32 v20, v12, v12 ; 0A28190C v_mad_f32 v12, v12, v12, v22 ; D1C1000C 045A190C v_sqrt_f32_e32 v12, v12 ; 7E184F0C v_mul_f32_e32 v12, 0x3e9e1b5f, v12 ; 0A1818FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v12 ; 7C8818F2 s_and_saveexec_b64 s[8:9], vcc ; BE88206A s_xor_b64 s[8:9], exec, s[8:9] ; 8888087E s_cbranch_execz BB0_22 ; BF880000 s_load_dwordx8 s[12:19], s[4:5], 0x200 ; C00E0302 00000200 s_load_dwordx4 s[28:31], s[4:5], 0x230 ; C00A0702 00000230 v_sub_f32_e32 v14, 1.0, v12 ; 041C18F2 v_mul_f32_e32 v14, 0x3c000000, v14 ; 0A1C1CFF 3C000000 v_mac_f32_e32 v14, 0x3f7e0000, v12 ; 2C1C18FF 3F7E0000 s_load_dwordx8 s[20:27], s[4:5], 0x240 ; C00E0502 00000240 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v12, v14, s[12:19], s[28:31] dmask:0x1 ; F0800100 00E30C0E s_load_dwordx4 s[12:15], s[4:5], 0x270 ; C00A0302 00000270 v_mov_b32_e32 v14, 0xc0400000 ; 7E1C02FF C0400000 v_mad_f32 v14, s3, v14, v0 ; D1C1000E 04021C03 v_subrev_f32_e32 v15, s2, v1 ; 061E0202 s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v11, v12, v11 ; 0216170C s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[14:15], v[14:15], s[20:27], s[12:15] dmask:0x3 ; F0800300 00650E0E s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v14, v12 ; 2C0A190E v_mac_f32_e32 v4, v15, v12 ; 2C08190F s_or_b64 exec, exec, s[8:9] ; 87FE087E v_add_f32_e32 v12, v22, v9 ; 02181316 v_sqrt_f32_e32 v12, v12 ; 7E184F0C v_mul_f32_e32 v12, 0x3e9e1b5f, v12 ; 0A1818FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v12 ; 7C8818F2 s_and_saveexec_b64 s[8:9], vcc ; BE88206A s_xor_b64 s[8:9], exec, s[8:9] ; 8888087E s_cbranch_execz BB0_24 ; BF880000 s_load_dwordx8 s[12:19], s[4:5], 0x200 ; C00E0302 00000200 s_load_dwordx4 s[28:31], s[4:5], 0x230 ; C00A0702 00000230 v_sub_f32_e32 v14, 1.0, v12 ; 041C18F2 v_mul_f32_e32 v14, 0x3c000000, v14 ; 0A1C1CFF 3C000000 v_mac_f32_e32 v14, 0x3f7e0000, v12 ; 2C1C18FF 3F7E0000 s_load_dwordx8 s[20:27], s[4:5], 0x240 ; C00E0502 00000240 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v12, v14, s[12:19], s[28:31] dmask:0x1 ; F0800100 00E30C0E s_load_dwordx4 s[12:15], s[4:5], 0x270 ; C00A0302 00000270 v_mad_f32 v14, -2.0, s3, v0 ; D1C1000E 040006F5 v_subrev_f32_e32 v15, s2, v1 ; 061E0202 s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v11, v12, v11 ; 0216170C s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[14:15], v[14:15], s[20:27], s[12:15] dmask:0x3 ; F0800300 00650E0E s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v14, v12 ; 2C0A190E v_mac_f32_e32 v4, v15, v12 ; 2C08190F s_or_b64 exec, exec, s[8:9] ; 87FE087E v_add_f32_e32 v12, v22, v2 ; 02180516 s_load_dwordx8 s[8:15], s[4:5], 0x200 ; C00E0202 00000200 s_load_dwordx4 s[24:27], s[4:5], 0x230 ; C00A0602 00000230 v_sqrt_f32_e32 v12, v12 ; 7E184F0C v_mov_b32_e32 v25, 0x3e9e1b5f ; 7E3202FF 3E9E1B5F s_load_dwordx8 s[16:23], s[4:5], 0x240 ; C00E0402 00000240 s_load_dwordx4 s[28:31], s[4:5], 0x270 ; C00A0702 00000270 v_mul_f32_e32 v14, v25, v12 ; 0A1C1919 v_mad_f32 v12, -v12, v25, 1.0 ; D1C1000C 23CA330C v_bfrev_b32_e32 v23, 60 ; 7E2E58BC v_mul_f32_e32 v12, v23, v12 ; 0A181917 v_mov_b32_e32 v24, 0x3f7e0000 ; 7E3002FF 3F7E0000 v_mac_f32_e32 v12, v24, v14 ; 2C181D18 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v14, v12, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C20E0C v_subrev_f32_e32 v19, s2, v1 ; 06260202 v_subrev_f32_e32 v18, s3, v0 ; 06240003 s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v15, v14, v11 ; 021E170E image_sample v[11:12], v[18:19], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E40B12 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v11, v14 ; 2C0A1D0B v_add_f32_e32 v11, v22, v6 ; 02160D16 v_sqrt_f32_e32 v11, v11 ; 7E164F0B v_mac_f32_e32 v4, v12, v14 ; 2C081D0C v_mul_f32_e32 v12, v25, v11 ; 0A181719 v_mad_f32 v11, -v11, v25, 1.0 ; D1C1000B 23CA330B v_mul_f32_e32 v11, v23, v11 ; 0A161717 v_mac_f32_e32 v11, v24, v12 ; 2C161918 v_mad_f32 v16, 0, s3, v0 ; D1C10010 04000680 v_mov_b32_e32 v17, v19 ; 7E220313 image_sample v14, v11, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C20E0B image_sample v[11:12], v[16:17], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E40B10 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v11, v14 ; 2C0A1D0B v_add_f32_e32 v11, v22, v7 ; 02160F16 v_sqrt_f32_e32 v11, v11 ; 7E164F0B v_mac_f32_e32 v4, v12, v14 ; 2C081D0C v_mul_f32_e32 v12, v25, v11 ; 0A181719 v_mad_f32 v11, -v11, v25, 1.0 ; D1C1000B 23CA330B v_mul_f32_e32 v11, v23, v11 ; 0A161717 v_mac_f32_e32 v11, v24, v12 ; 2C161918 v_add_f32_e32 v15, v14, v15 ; 021E1F0E image_sample v17, v11, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C2110B s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v26, v17, v15 ; 02341F11 v_add_f32_e32 v14, s3, v0 ; 021C0003 v_mov_b32_e32 v15, v19 ; 7E1E0313 image_sample v[11:12], v[14:15], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E40B0E s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v11, v17 ; 2C0A230B v_add_f32_e32 v11, v22, v8 ; 02161116 v_sqrt_f32_e32 v11, v11 ; 7E164F0B v_mac_f32_e32 v4, v12, v17 ; 2C08230C v_mul_f32_e32 v12, v25, v11 ; 0A181719 v_mad_f32 v11, -v11, v25, 1.0 ; D1C1000B 23CA330B v_mul_f32_e32 v11, v23, v11 ; 0A161717 v_mac_f32_e32 v11, v24, v12 ; 2C161918 image_sample v17, v11, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C2110B v_mad_f32 v11, 2.0, s3, v0 ; D1C1000B 040006F4 v_mov_b32_e32 v12, v19 ; 7E180313 image_sample v[23:24], v[11:12], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E4170B v_add_f32_e32 v12, v22, v10 ; 02181516 v_sqrt_f32_e32 v12, v12 ; 7E184F0C v_mul_f32_e32 v12, v25, v12 ; 0A181919 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v15, v17, v26 ; 021E3511 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v23, v17 ; 2C0A2317 v_mac_f32_e32 v4, v24, v17 ; 2C082318 v_cmp_gt_f32_e32 vcc, 1.0, v12 ; 7C8818F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_26 ; BF880000 v_sub_f32_e32 v17, 1.0, v12 ; 042218F2 v_mul_f32_e32 v17, 0x3c000000, v17 ; 0A2222FF 3C000000 v_mac_f32_e32 v17, 0x3f7e0000, v12 ; 2C2218FF 3F7E0000 image_sample v12, v17, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C20C11 v_mov_b32_e32 v17, 0x40400000 ; 7E2202FF 40400000 v_mad_f32 v23, s3, v17, v0 ; D1C10017 04022203 v_mov_b32_e32 v24, v19 ; 7E300313 image_sample v[23:24], v[23:24], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41717 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v15, v12, v15 ; 021E1F0C s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v23, v12 ; 2C0A1917 v_mac_f32_e32 v4, v24, v12 ; 2C081918 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_sub_f32_e32 v17, 4.0, v21 ; 04222AF6 v_mac_f32_e32 v22, v17, v17 ; 2C2C2311 v_mul_f32_e32 v12, v17, v17 ; 0A182311 v_sqrt_f32_e32 v17, v22 ; 7E224F16 v_mul_f32_e32 v17, 0x3e9e1b5f, v17 ; 0A2222FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v17 ; 7C8822F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_28 ; BF880000 v_sub_f32_e32 v21, 1.0, v17 ; 042A22F2 v_mul_f32_e32 v21, 0x3c000000, v21 ; 0A2A2AFF 3C000000 v_mac_f32_e32 v21, 0x3f7e0000, v17 ; 2C2A22FF 3F7E0000 image_sample v17, v21, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21115 v_mad_f32 v21, 4.0, s3, v0 ; D1C10015 040006F6 v_mov_b32_e32 v22, v19 ; 7E2C0313 image_sample v[21:22], v[21:22], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41515 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v15, v17, v15 ; 021E1F11 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v21, v17 ; 2C0A2315 v_mac_f32_e32 v4, v22, v17 ; 2C082316 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_mad_f32 v19, v3, v3, v20 ; D1C10013 04520703 v_sqrt_f32_e32 v19, v19 ; 7E264F13 v_mul_f32_e32 v19, 0x3e9e1b5f, v19 ; 0A2626FF 3E9E1B5F v_mul_f32_e32 v17, v3, v3 ; 0A220703 v_cmp_gt_f32_e32 vcc, 1.0, v19 ; 7C8826F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_30 ; BF880000 v_sub_f32_e32 v21, 1.0, v19 ; 042A26F2 v_mul_f32_e32 v21, 0x3c000000, v21 ; 0A2A2AFF 3C000000 v_mac_f32_e32 v21, 0x3f7e0000, v19 ; 2C2A26FF 3F7E0000 image_sample v19, v21, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21315 v_mov_b32_e32 v21, 0xc0400000 ; 7E2A02FF C0400000 v_mad_f32 v21, s3, v21, v0 ; D1C10015 04022A03 v_mad_f32 v22, 0, s2, v1 ; D1C10016 04040480 image_sample v[21:22], v[21:22], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41515 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v15, v19, v15 ; 021E1F13 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v21, v19 ; 2C0A2715 v_mac_f32_e32 v4, v22, v19 ; 2C082716 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_add_f32_e32 v19, v17, v9 ; 02261311 v_sqrt_f32_e32 v19, v19 ; 7E264F13 v_mul_f32_e32 v19, 0x3e9e1b5f, v19 ; 0A2626FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v19 ; 7C8826F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_32 ; BF880000 v_sub_f32_e32 v21, 1.0, v19 ; 042A26F2 v_mul_f32_e32 v21, 0x3c000000, v21 ; 0A2A2AFF 3C000000 v_mac_f32_e32 v21, 0x3f7e0000, v19 ; 2C2A26FF 3F7E0000 image_sample v19, v21, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21315 v_mad_f32 v21, -2.0, s3, v0 ; D1C10015 040006F5 v_mad_f32 v22, 0, s2, v1 ; D1C10016 04040480 image_sample v[21:22], v[21:22], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41515 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v15, v19, v15 ; 021E1F13 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v21, v19 ; 2C0A2715 v_mac_f32_e32 v4, v22, v19 ; 2C082716 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_add_f32_e32 v19, v17, v2 ; 02260511 v_sqrt_f32_e32 v19, v19 ; 7E264F13 v_mov_b32_e32 v21, 0x3e9e1b5f ; 7E2A02FF 3E9E1B5F v_mul_f32_e32 v22, v21, v19 ; 0A2C2715 v_mad_f32 v19, -v19, v21, 1.0 ; D1C10013 23CA2B13 v_bfrev_b32_e32 v25, 60 ; 7E3258BC v_mul_f32_e32 v19, v25, v19 ; 0A262719 v_mov_b32_e32 v26, 0x3f7e0000 ; 7E3402FF 3F7E0000 v_mac_f32_e32 v19, v26, v22 ; 2C262D1A image_sample v19, v19, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21313 s_waitcnt vmcnt(0) ; BF8C0F70 v_mov_b32_e32 v24, v19 ; 7E300313 v_mad_f32 v22, 0, s2, v1 ; D1C10016 04040480 v_mov_b32_e32 v23, v18 ; 7E2E0312 v_mov_b32_e32 v24, v22 ; 7E300316 image_sample v[23:24], v[23:24], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41717 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v23, v19 ; 2C0A2717 v_add_f32_e32 v23, v17, v6 ; 022E0D11 v_sqrt_f32_e32 v23, v23 ; 7E2E4F17 v_mac_f32_e32 v4, v24, v19 ; 2C082718 v_mul_f32_e32 v24, v21, v23 ; 0A302F15 v_mad_f32 v23, -v23, v21, 1.0 ; D1C10017 23CA2B17 v_mul_f32_e32 v23, v25, v23 ; 0A2E2F19 v_mac_f32_e32 v23, v26, v24 ; 2C2E311A image_sample v27, v23, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21B17 image_sample v[23:24], v[0:1], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41700 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v23, v27 ; 2C0A3717 v_add_f32_e32 v23, v17, v7 ; 022E0F11 v_sqrt_f32_e32 v23, v23 ; 7E2E4F17 v_mac_f32_e32 v4, v24, v27 ; 2C083718 v_mul_f32_e32 v24, v21, v23 ; 0A302F15 v_mad_f32 v23, -v23, v21, 1.0 ; D1C10017 23CA2B17 v_mul_f32_e32 v23, v25, v23 ; 0A2E2F19 v_mac_f32_e32 v23, v26, v24 ; 2C2E311A image_sample v28, v23, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21C17 v_mov_b32_e32 v24, v15 ; 7E30030F v_mov_b32_e32 v23, v14 ; 7E2E030E v_mov_b32_e32 v24, v22 ; 7E300316 image_sample v[23:24], v[23:24], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41717 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v23, v28 ; 2C0A3917 v_add_f32_e32 v23, v17, v8 ; 022E1111 v_sqrt_f32_e32 v23, v23 ; 7E2E4F17 v_mac_f32_e32 v4, v24, v28 ; 2C083918 v_mad_f32 v24, -v23, v21, 1.0 ; D1C10018 23CA2B17 v_mul_f32_e32 v24, v25, v24 ; 0A303119 v_mul_f32_e32 v23, v21, v23 ; 0A2E2F15 v_mac_f32_e32 v24, v26, v23 ; 2C302F1A v_add_f32_e32 v15, v19, v15 ; 021E1F13 image_sample v25, v24, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21918 v_mov_b32_e32 v24, v12 ; 7E30030C v_add_f32_e32 v19, v17, v10 ; 02261511 v_add_f32_e32 v15, v27, v15 ; 021E1F1B v_sqrt_f32_e32 v19, v19 ; 7E264F13 v_mov_b32_e32 v23, v11 ; 7E2E030B v_mov_b32_e32 v24, v22 ; 7E300316 image_sample v[23:24], v[23:24], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41717 v_add_f32_e32 v15, v28, v15 ; 021E1F1C v_mul_f32_e32 v19, v21, v19 ; 0A262715 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v23, v25 ; 2C0A3317 v_mac_f32_e32 v4, v24, v25 ; 2C083318 v_add_f32_e32 v15, v25, v15 ; 021E1F19 v_cmp_gt_f32_e32 vcc, 1.0, v19 ; 7C8826F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_34 ; BF880000 v_sub_f32_e32 v21, 1.0, v19 ; 042A26F2 v_mul_f32_e32 v21, 0x3c000000, v21 ; 0A2A2AFF 3C000000 v_mac_f32_e32 v21, 0x3f7e0000, v19 ; 2C2A26FF 3F7E0000 image_sample v19, v21, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21315 v_mov_b32_e32 v21, 0x40400000 ; 7E2A02FF 40400000 v_mad_f32 v21, s3, v21, v0 ; D1C10015 04022A03 image_sample v[23:24], v[21:22], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41715 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v15, v19, v15 ; 021E1F13 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v23, v19 ; 2C0A2717 v_mac_f32_e32 v4, v24, v19 ; 2C082718 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_add_f32_e32 v17, v17, v12 ; 02221911 v_sqrt_f32_e32 v17, v17 ; 7E224F11 v_mul_f32_e32 v17, 0x3e9e1b5f, v17 ; 0A2222FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v17 ; 7C8822F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E v_sub_f32_e32 v19, 1.0, v17 ; 042622F2 v_mul_f32_e32 v19, 0x3c000000, v19 ; 0A2626FF 3C000000 v_mac_f32_e32 v19, 0x3f7e0000, v17 ; 2C2622FF 3F7E0000 v_mad_f32 v21, 4.0, s3, v0 ; D1C10015 040006F6 image_sample v17, v19, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21113 image_sample v[21:22], v[21:22], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41515 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v15, v17, v15 ; 021E1F11 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v21, v17 ; 2C0A2315 v_mac_f32_e32 v4, v22, v17 ; 2C082316 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_sub_f32_e32 v19, 1.0, v3 ; 042606F2 v_mul_f32_e32 v17, v19, v19 ; 0A222713 v_mad_f32 v19, v19, v19, v20 ; D1C10013 04522713 v_sqrt_f32_e32 v19, v19 ; 7E264F13 v_mul_f32_e32 v19, 0x3e9e1b5f, v19 ; 0A2626FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v19 ; 7C8826F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_38 ; BF880000 v_sub_f32_e32 v21, 1.0, v19 ; 042A26F2 v_mul_f32_e32 v21, 0x3c000000, v21 ; 0A2A2AFF 3C000000 v_mac_f32_e32 v21, 0x3f7e0000, v19 ; 2C2A26FF 3F7E0000 image_sample v19, v21, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21315 v_mov_b32_e32 v21, 0xc0400000 ; 7E2A02FF C0400000 v_mad_f32 v21, s3, v21, v0 ; D1C10015 04022A03 v_add_f32_e32 v22, s2, v1 ; 022C0202 image_sample v[21:22], v[21:22], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41515 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v15, v19, v15 ; 021E1F13 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v21, v19 ; 2C0A2715 v_mac_f32_e32 v4, v22, v19 ; 2C082716 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_add_f32_e32 v19, v17, v9 ; 02261311 v_sqrt_f32_e32 v19, v19 ; 7E264F13 v_mul_f32_e32 v19, 0x3e9e1b5f, v19 ; 0A2626FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v19 ; 7C8826F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_40 ; BF880000 v_sub_f32_e32 v21, 1.0, v19 ; 042A26F2 v_mul_f32_e32 v21, 0x3c000000, v21 ; 0A2A2AFF 3C000000 v_mac_f32_e32 v21, 0x3f7e0000, v19 ; 2C2A26FF 3F7E0000 image_sample v19, v21, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21315 v_mad_f32 v21, -2.0, s3, v0 ; D1C10015 040006F5 v_add_f32_e32 v22, s2, v1 ; 022C0202 image_sample v[21:22], v[21:22], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41515 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v15, v19, v15 ; 021E1F13 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v21, v19 ; 2C0A2715 v_mac_f32_e32 v4, v22, v19 ; 2C082716 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_add_f32_e32 v19, v17, v2 ; 02260511 v_sqrt_f32_e32 v19, v19 ; 7E264F13 v_mov_b32_e32 v21, 0x3e9e1b5f ; 7E2A02FF 3E9E1B5F v_mul_f32_e32 v22, v21, v19 ; 0A2C2715 v_mad_f32 v19, -v19, v21, 1.0 ; D1C10013 23CA2B13 v_bfrev_b32_e32 v25, 60 ; 7E3258BC v_mul_f32_e32 v19, v25, v19 ; 0A262719 v_mov_b32_e32 v26, 0x3f7e0000 ; 7E3402FF 3F7E0000 v_mac_f32_e32 v19, v26, v22 ; 2C262D1A image_sample v19, v19, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21313 s_waitcnt vmcnt(0) ; BF8C0F70 v_mov_b32_e32 v24, v19 ; 7E300313 v_add_f32_e32 v22, s2, v1 ; 022C0202 v_mov_b32_e32 v23, v18 ; 7E2E0312 v_mov_b32_e32 v24, v22 ; 7E300316 image_sample v[23:24], v[23:24], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41717 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v23, v19 ; 2C0A2717 v_add_f32_e32 v23, v17, v6 ; 022E0D11 v_sqrt_f32_e32 v23, v23 ; 7E2E4F17 v_mac_f32_e32 v4, v24, v19 ; 2C082718 v_mul_f32_e32 v24, v21, v23 ; 0A302F15 v_mad_f32 v23, -v23, v21, 1.0 ; D1C10017 23CA2B17 v_mul_f32_e32 v23, v25, v23 ; 0A2E2F19 v_mac_f32_e32 v23, v26, v24 ; 2C2E311A image_sample v27, v23, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21B17 v_mov_b32_e32 v24, v17 ; 7E300311 v_mov_b32_e32 v23, v16 ; 7E2E0310 v_mov_b32_e32 v24, v22 ; 7E300316 image_sample v[23:24], v[23:24], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41717 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v23, v27 ; 2C0A3717 v_add_f32_e32 v23, v17, v7 ; 022E0F11 v_sqrt_f32_e32 v23, v23 ; 7E2E4F17 v_mac_f32_e32 v4, v24, v27 ; 2C083718 v_mul_f32_e32 v24, v21, v23 ; 0A302F15 v_mad_f32 v23, -v23, v21, 1.0 ; D1C10017 23CA2B17 v_mul_f32_e32 v23, v25, v23 ; 0A2E2F19 v_mac_f32_e32 v23, v26, v24 ; 2C2E311A image_sample v28, v23, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21C17 v_mov_b32_e32 v24, v15 ; 7E30030F v_mov_b32_e32 v23, v14 ; 7E2E030E v_mov_b32_e32 v24, v22 ; 7E300316 image_sample v[23:24], v[23:24], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41717 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v23, v28 ; 2C0A3917 v_add_f32_e32 v23, v17, v8 ; 022E1111 v_sqrt_f32_e32 v23, v23 ; 7E2E4F17 v_mac_f32_e32 v4, v24, v28 ; 2C083918 v_mad_f32 v24, -v23, v21, 1.0 ; D1C10018 23CA2B17 v_add_f32_e32 v15, v19, v15 ; 021E1F13 v_mul_f32_e32 v24, v25, v24 ; 0A303119 v_mul_f32_e32 v23, v21, v23 ; 0A2E2F15 v_add_f32_e32 v15, v27, v15 ; 021E1F1B v_mac_f32_e32 v24, v26, v23 ; 2C302F1A image_sample v25, v24, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21918 v_add_f32_e32 v15, v28, v15 ; 021E1F1C v_mov_b32_e32 v24, v12 ; 7E30030C s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v19, v25, v15 ; 02261F19 v_add_f32_e32 v15, v17, v10 ; 021E1511 v_sqrt_f32_e32 v15, v15 ; 7E1E4F0F v_mov_b32_e32 v23, v11 ; 7E2E030B v_mov_b32_e32 v24, v22 ; 7E300316 image_sample v[23:24], v[23:24], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41717 v_mul_f32_e32 v15, v21, v15 ; 0A1E1F15 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v23, v25 ; 2C0A3317 v_mac_f32_e32 v4, v24, v25 ; 2C083318 v_cmp_gt_f32_e32 vcc, 1.0, v15 ; 7C881EF2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_42 ; BF880000 v_sub_f32_e32 v21, 1.0, v15 ; 042A1EF2 v_mul_f32_e32 v21, 0x3c000000, v21 ; 0A2A2AFF 3C000000 v_mac_f32_e32 v21, 0x3f7e0000, v15 ; 2C2A1EFF 3F7E0000 image_sample v15, v21, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C20F15 v_mov_b32_e32 v21, 0x40400000 ; 7E2A02FF 40400000 v_mad_f32 v21, s3, v21, v0 ; D1C10015 04022A03 image_sample v[23:24], v[21:22], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41715 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v19, v15, v19 ; 0226270F s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v23, v15 ; 2C0A1F17 v_mac_f32_e32 v4, v24, v15 ; 2C081F18 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_add_f32_e32 v15, v17, v12 ; 021E1911 v_sqrt_f32_e32 v15, v15 ; 7E1E4F0F v_mul_f32_e32 v15, 0x3e9e1b5f, v15 ; 0A1E1EFF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v15 ; 7C881EF2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E v_sub_f32_e32 v17, 1.0, v15 ; 04221EF2 v_mul_f32_e32 v17, 0x3c000000, v17 ; 0A2222FF 3C000000 v_mac_f32_e32 v17, 0x3f7e0000, v15 ; 2C221EFF 3F7E0000 v_mad_f32 v21, 4.0, s3, v0 ; D1C10015 040006F6 image_sample v15, v17, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C20F11 image_sample v[21:22], v[21:22], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41515 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v19, v15, v19 ; 0226270F s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v21, v15 ; 2C0A1F15 v_mac_f32_e32 v4, v22, v15 ; 2C081F16 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_sub_f32_e32 v17, 2.0, v3 ; 042206F4 v_mac_f32_e32 v20, v17, v17 ; 2C282311 v_mul_f32_e32 v15, v17, v17 ; 0A1E2311 v_sqrt_f32_e32 v17, v20 ; 7E224F14 v_mul_f32_e32 v17, 0x3e9e1b5f, v17 ; 0A2222FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v17 ; 7C8822F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_46 ; BF880000 v_sub_f32_e32 v20, 1.0, v17 ; 042822F2 v_mul_f32_e32 v20, 0x3c000000, v20 ; 0A2828FF 3C000000 v_mac_f32_e32 v20, 0x3f7e0000, v17 ; 2C2822FF 3F7E0000 image_sample v17, v20, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21114 v_mov_b32_e32 v20, 0xc0400000 ; 7E2802FF C0400000 v_mad_f32 v20, s3, v20, v0 ; D1C10014 04022803 v_mad_f32 v21, 2.0, s2, v1 ; D1C10015 040404F4 image_sample v[20:21], v[20:21], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41414 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v19, v17, v19 ; 02262711 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v20, v17 ; 2C0A2314 v_mac_f32_e32 v4, v21, v17 ; 2C082315 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_add_f32_e32 v17, v15, v9 ; 0222130F v_sqrt_f32_e32 v17, v17 ; 7E224F11 v_mul_f32_e32 v17, 0x3e9e1b5f, v17 ; 0A2222FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v17 ; 7C8822F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_48 ; BF880000 v_sub_f32_e32 v20, 1.0, v17 ; 042822F2 v_mul_f32_e32 v20, 0x3c000000, v20 ; 0A2828FF 3C000000 v_mac_f32_e32 v20, 0x3f7e0000, v17 ; 2C2822FF 3F7E0000 image_sample v17, v20, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21114 v_mad_f32 v20, -2.0, s3, v0 ; D1C10014 040006F5 v_mad_f32 v21, 2.0, s2, v1 ; D1C10015 040404F4 image_sample v[20:21], v[20:21], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41414 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v19, v17, v19 ; 02262711 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v20, v17 ; 2C0A2314 v_mac_f32_e32 v4, v21, v17 ; 2C082315 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_add_f32_e32 v17, v15, v2 ; 0222050F v_sqrt_f32_e32 v17, v17 ; 7E224F11 v_mov_b32_e32 v21, 0x3e9e1b5f ; 7E2A02FF 3E9E1B5F v_mul_f32_e32 v20, v21, v17 ; 0A282315 v_mad_f32 v17, -v17, v21, 1.0 ; D1C10011 23CA2B11 v_bfrev_b32_e32 v23, 60 ; 7E2E58BC v_mul_f32_e32 v17, v23, v17 ; 0A222317 v_mov_b32_e32 v24, 0x3f7e0000 ; 7E3002FF 3F7E0000 v_mac_f32_e32 v17, v24, v20 ; 2C222918 image_sample v17, v17, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21111 s_waitcnt vmcnt(0) ; BF8C0F70 v_add_f32_e32 v25, v17, v19 ; 02322711 v_mov_b32_e32 v20, v19 ; 7E280313 v_mad_f32 v22, 2.0, s2, v1 ; D1C10016 040404F4 v_mov_b32_e32 v19, v18 ; 7E260312 v_mov_b32_e32 v20, v22 ; 7E280316 image_sample v[19:20], v[19:20], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41313 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v19, v17 ; 2C0A2313 v_mac_f32_e32 v4, v20, v17 ; 2C082314 v_add_f32_e32 v17, v15, v6 ; 02220D0F v_sqrt_f32_e32 v17, v17 ; 7E224F11 v_mul_f32_e32 v19, v21, v17 ; 0A262315 v_mad_f32 v17, -v17, v21, 1.0 ; D1C10011 23CA2B11 v_mul_f32_e32 v17, v23, v17 ; 0A222317 v_mac_f32_e32 v17, v24, v19 ; 2C222718 image_sample v17, v17, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21111 s_waitcnt vmcnt(0) ; BF8C0F70 v_mov_b32_e32 v20, v17 ; 7E280311 v_mov_b32_e32 v19, v16 ; 7E260310 v_mov_b32_e32 v20, v22 ; 7E280316 image_sample v[19:20], v[19:20], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41313 v_add_f32_e32 v25, v17, v25 ; 02323311 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v19, v17 ; 2C0A2313 v_mac_f32_e32 v4, v20, v17 ; 2C082314 v_add_f32_e32 v17, v15, v7 ; 02220F0F v_sqrt_f32_e32 v17, v17 ; 7E224F11 v_mul_f32_e32 v19, v21, v17 ; 0A262315 v_mad_f32 v17, -v17, v21, 1.0 ; D1C10011 23CA2B11 v_mul_f32_e32 v17, v23, v17 ; 0A222317 v_mac_f32_e32 v17, v24, v19 ; 2C222718 v_mov_b32_e32 v20, v15 ; 7E28030F v_mov_b32_e32 v19, v14 ; 7E26030E v_mov_b32_e32 v20, v22 ; 7E280316 image_sample v17, v17, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21111 image_sample v[19:20], v[19:20], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41313 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v25, v17, v25 ; 02323311 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v19, v17 ; 2C0A2313 v_mac_f32_e32 v4, v20, v17 ; 2C082314 v_add_f32_e32 v17, v15, v8 ; 0222110F v_sqrt_f32_e32 v17, v17 ; 7E224F11 v_mul_f32_e32 v19, v21, v17 ; 0A262315 v_mad_f32 v17, -v17, v21, 1.0 ; D1C10011 23CA2B11 v_mul_f32_e32 v17, v23, v17 ; 0A222317 v_mac_f32_e32 v17, v24, v19 ; 2C222718 v_mov_b32_e32 v24, v12 ; 7E30030C v_mov_b32_e32 v23, v11 ; 7E2E030B v_mov_b32_e32 v24, v22 ; 7E300316 image_sample v17, v17, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21111 image_sample v[23:24], v[23:24], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41717 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v20, v17, v25 ; 02283311 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v23, v17 ; 2C0A2317 v_mac_f32_e32 v4, v24, v17 ; 2C082318 v_add_f32_e32 v17, v15, v10 ; 0222150F v_sqrt_f32_e32 v17, v17 ; 7E224F11 v_mul_f32_e32 v17, v21, v17 ; 0A222315 v_cmp_gt_f32_e32 vcc, 1.0, v17 ; 7C8822F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_50 ; BF880000 v_sub_f32_e32 v19, 1.0, v17 ; 042622F2 v_mul_f32_e32 v19, 0x3c000000, v19 ; 0A2626FF 3C000000 v_mac_f32_e32 v19, 0x3f7e0000, v17 ; 2C2622FF 3F7E0000 image_sample v17, v19, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C21113 v_mov_b32_e32 v19, 0x40400000 ; 7E2602FF 40400000 v_mad_f32 v21, s3, v19, v0 ; D1C10015 04022603 image_sample v[23:24], v[21:22], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41715 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v20, v17, v20 ; 02282911 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v23, v17 ; 2C0A2317 v_mac_f32_e32 v4, v24, v17 ; 2C082318 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_add_f32_e32 v12, v15, v12 ; 0218190F v_sqrt_f32_e32 v12, v12 ; 7E184F0C v_mul_f32_e32 v12, 0x3e9e1b5f, v12 ; 0A1818FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v12 ; 7C8818F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E v_sub_f32_e32 v15, 1.0, v12 ; 041E18F2 v_mul_f32_e32 v15, 0x3c000000, v15 ; 0A1E1EFF 3C000000 v_mac_f32_e32 v15, 0x3f7e0000, v12 ; 2C1E18FF 3F7E0000 v_mad_f32 v21, 4.0, s3, v0 ; D1C10015 040006F6 image_sample v12, v15, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C20C0F image_sample v[21:22], v[21:22], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41515 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v20, v12, v20 ; 0228290C s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v21, v12 ; 2C0A1915 v_mac_f32_e32 v4, v22, v12 ; 2C081916 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_sub_f32_e32 v15, 0x40400000, v3 ; 041E06FF 40400000 v_mac_f32_e32 v9, v15, v15 ; 2C121F0F v_sqrt_f32_e32 v9, v9 ; 7E124F09 v_mul_f32_e32 v9, 0x3e9e1b5f, v9 ; 0A1212FF 3E9E1B5F v_mul_f32_e32 v12, v15, v15 ; 0A181F0F v_cmp_gt_f32_e32 vcc, 1.0, v9 ; 7C8812F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_54 ; BF880000 v_sub_f32_e32 v15, 1.0, v9 ; 041E12F2 v_mul_f32_e32 v15, 0x3c000000, v15 ; 0A1E1EFF 3C000000 v_mac_f32_e32 v15, 0x3f7e0000, v9 ; 2C1E12FF 3F7E0000 image_sample v9, v15, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C2090F v_mov_b32_e32 v15, 0x40400000 ; 7E1E02FF 40400000 v_mad_f32 v21, -2.0, s3, v0 ; D1C10015 040006F5 v_mad_f32 v22, s2, v15, v1 ; D1C10016 04061E02 image_sample v[21:22], v[21:22], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41515 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v20, v9, v20 ; 02282909 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v21, v9 ; 2C0A1315 v_mac_f32_e32 v4, v22, v9 ; 2C081316 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_add_f32_e32 v9, v12, v2 ; 0212050C v_sqrt_f32_e32 v9, v9 ; 7E124F09 v_mul_f32_e32 v9, 0x3e9e1b5f, v9 ; 0A1212FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v9 ; 7C8812F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_56 ; BF880000 v_sub_f32_e32 v15, 1.0, v9 ; 041E12F2 v_mul_f32_e32 v15, 0x3c000000, v15 ; 0A1E1EFF 3C000000 v_mac_f32_e32 v15, 0x3f7e0000, v9 ; 2C1E12FF 3F7E0000 image_sample v9, v15, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C2090F v_mov_b32_e32 v15, 0x40400000 ; 7E1E02FF 40400000 v_mov_b32_e32 v22, v19 ; 7E2C0313 v_mad_f32 v15, s2, v15, v1 ; D1C1000F 04061E02 v_mov_b32_e32 v21, v18 ; 7E2A0312 v_mov_b32_e32 v22, v15 ; 7E2C030F image_sample v[21:22], v[21:22], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41515 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v20, v9, v20 ; 02282909 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v21, v9 ; 2C0A1315 v_mac_f32_e32 v4, v22, v9 ; 2C081316 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_add_f32_e32 v9, v12, v6 ; 02120D0C v_sqrt_f32_e32 v9, v9 ; 7E124F09 v_mul_f32_e32 v9, 0x3e9e1b5f, v9 ; 0A1212FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v9 ; 7C8812F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_58 ; BF880000 v_sub_f32_e32 v15, 1.0, v9 ; 041E12F2 v_mul_f32_e32 v15, 0x3c000000, v15 ; 0A1E1EFF 3C000000 v_mac_f32_e32 v15, 0x3f7e0000, v9 ; 2C1E12FF 3F7E0000 image_sample v9, v15, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C2090F v_mov_b32_e32 v15, 0x40400000 ; 7E1E02FF 40400000 v_mov_b32_e32 v22, v17 ; 7E2C0311 v_mad_f32 v15, s2, v15, v1 ; D1C1000F 04061E02 v_mov_b32_e32 v21, v16 ; 7E2A0310 v_mov_b32_e32 v22, v15 ; 7E2C030F image_sample v[21:22], v[21:22], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41515 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v20, v9, v20 ; 02282909 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v21, v9 ; 2C0A1315 v_mac_f32_e32 v4, v22, v9 ; 2C081316 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_add_f32_e32 v9, v12, v7 ; 02120F0C v_sqrt_f32_e32 v9, v9 ; 7E124F09 v_mul_f32_e32 v9, 0x3e9e1b5f, v9 ; 0A1212FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v9 ; 7C8812F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_60 ; BF880000 v_sub_f32_e32 v15, 1.0, v9 ; 041E12F2 v_mul_f32_e32 v15, 0x3c000000, v15 ; 0A1E1EFF 3C000000 v_mac_f32_e32 v15, 0x3f7e0000, v9 ; 2C1E12FF 3F7E0000 image_sample v9, v15, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C2090F v_mov_b32_e32 v15, 0x40400000 ; 7E1E02FF 40400000 v_mad_f32 v15, s2, v15, v1 ; D1C1000F 04061E02 v_mov_b32_e32 v22, v15 ; 7E2C030F v_mov_b32_e32 v21, v14 ; 7E2A030E image_sample v[21:22], v[21:22], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41515 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v20, v9, v20 ; 02282909 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v21, v9 ; 2C0A1315 v_mac_f32_e32 v4, v22, v9 ; 2C081316 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_add_f32_e32 v9, v12, v8 ; 0212110C v_sqrt_f32_e32 v9, v9 ; 7E124F09 v_mul_f32_e32 v9, 0x3e9e1b5f, v9 ; 0A1212FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v9 ; 7C8812F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_62 ; BF880000 v_sub_f32_e32 v15, 1.0, v9 ; 041E12F2 v_mul_f32_e32 v15, 0x3c000000, v15 ; 0A1E1EFF 3C000000 v_mac_f32_e32 v15, 0x3f7e0000, v9 ; 2C1E12FF 3F7E0000 image_sample v9, v15, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C2090F v_mov_b32_e32 v15, 0x40400000 ; 7E1E02FF 40400000 v_mov_b32_e32 v22, v12 ; 7E2C030C v_mad_f32 v15, s2, v15, v1 ; D1C1000F 04061E02 v_mov_b32_e32 v21, v11 ; 7E2A030B v_mov_b32_e32 v22, v15 ; 7E2C030F image_sample v[21:22], v[21:22], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E41515 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v20, v9, v20 ; 02282909 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v21, v9 ; 2C0A1315 v_mac_f32_e32 v4, v22, v9 ; 2C081316 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_add_f32_e32 v9, v12, v10 ; 0212150C v_sqrt_f32_e32 v9, v9 ; 7E124F09 v_mul_f32_e32 v9, 0x3e9e1b5f, v9 ; 0A1212FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v9 ; 7C8812F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_64 ; BF880000 v_sub_f32_e32 v10, 1.0, v9 ; 041412F2 v_mul_f32_e32 v10, 0x3c000000, v10 ; 0A1414FF 3C000000 v_mac_f32_e32 v10, 0x3f7e0000, v9 ; 2C1412FF 3F7E0000 image_sample v12, v10, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C20C0A v_mov_b32_e32 v10, 0x40400000 ; 7E1402FF 40400000 v_mov_b32_e32 v9, v0 ; 7E120300 v_mac_f32_e32 v9, s3, v10 ; 2C121403 v_mad_f32 v10, s2, v10, v1 ; D1C1000A 04061402 image_sample v[9:10], v[9:10], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E40909 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v20, v12, v20 ; 0228290C s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v9, v12 ; 2C0A1909 v_mac_f32_e32 v4, v10, v12 ; 2C08190A s_or_b64 exec, exec, s[4:5] ; 87FE047E v_sub_f32_e32 v3, 4.0, v3 ; 040606F6 v_mac_f32_e32 v2, v3, v3 ; 2C040703 v_sqrt_f32_e32 v2, v2 ; 7E044F02 v_mul_f32_e32 v2, 0x3e9e1b5f, v2 ; 0A0404FF 3E9E1B5F v_mul_f32_e32 v0, v3, v3 ; 0A000703 v_cmp_gt_f32_e32 vcc, 1.0, v2 ; 7C8804F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E v_sub_f32_e32 v3, 1.0, v2 ; 040604F2 v_mul_f32_e32 v3, 0x3c000000, v3 ; 0A0606FF 3C000000 v_mac_f32_e32 v3, 0x3f7e0000, v2 ; 2C0604FF 3F7E0000 v_mad_f32 v19, 4.0, s2, v1 ; D1C10013 040404F6 image_sample v9, v3, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C20903 image_sample v[2:3], v[18:19], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E40212 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v20, v9, v20 ; 02282909 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v2, v9 ; 2C0A1302 v_mac_f32_e32 v4, v3, v9 ; 2C081303 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_add_f32_e32 v2, v0, v6 ; 02040D00 v_sqrt_f32_e32 v2, v2 ; 7E044F02 v_mul_f32_e32 v2, 0x3e9e1b5f, v2 ; 0A0404FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v2 ; 7C8804F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E v_sub_f32_e32 v3, 1.0, v2 ; 040604F2 v_mul_f32_e32 v3, 0x3c000000, v3 ; 0A0606FF 3C000000 v_mac_f32_e32 v3, 0x3f7e0000, v2 ; 2C0604FF 3F7E0000 v_mad_f32 v17, 4.0, s2, v1 ; D1C10011 040404F6 image_sample v6, v3, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C20603 image_sample v[2:3], v[16:17], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E40210 s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v20, v6, v20 ; 02282906 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v2, v6 ; 2C0A0D02 v_mac_f32_e32 v4, v3, v6 ; 2C080D03 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_add_f32_e32 v2, v0, v7 ; 02040F00 v_sqrt_f32_e32 v2, v2 ; 7E044F02 v_mul_f32_e32 v2, 0x3e9e1b5f, v2 ; 0A0404FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v2 ; 7C8804F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E v_sub_f32_e32 v3, 1.0, v2 ; 040604F2 v_mul_f32_e32 v3, 0x3c000000, v3 ; 0A0606FF 3C000000 v_mac_f32_e32 v3, 0x3f7e0000, v2 ; 2C0604FF 3F7E0000 v_mad_f32 v15, 4.0, s2, v1 ; D1C1000F 040404F6 image_sample v6, v3, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C20603 image_sample v[2:3], v[14:15], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E4020E s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v20, v6, v20 ; 02282906 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v2, v6 ; 2C0A0D02 v_mac_f32_e32 v4, v3, v6 ; 2C080D03 s_or_b64 exec, exec, s[4:5] ; 87FE047E v_add_f32_e32 v0, v0, v8 ; 02001100 v_sqrt_f32_e32 v0, v0 ; 7E004F00 v_mul_f32_e32 v0, 0x3e9e1b5f, v0 ; 0A0000FF 3E9E1B5F v_cmp_gt_f32_e32 vcc, 1.0, v0 ; 7C8800F2 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_72 ; BF880000 v_sub_f32_e32 v2, 1.0, v0 ; 040400F2 v_mul_f32_e32 v2, 0x3c000000, v2 ; 0A0404FF 3C000000 v_mac_f32_e64 v1, s2, 4.0 ; D1160001 0001EC02 v_mac_f32_e32 v2, 0x3f7e0000, v0 ; 2C0400FF 3F7E0000 v_mov_b32_e32 v12, v1 ; 7E180301 image_sample v2, v2, s[8:15], s[24:27] dmask:0x1 ; F0800100 00C20202 image_sample v[0:1], v[11:12], s[16:23], s[28:31] dmask:0x3 ; F0800300 00E4000B s_waitcnt vmcnt(1) ; BF8C0F71 v_add_f32_e32 v20, v2, v20 ; 02282902 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v5, v0, v2 ; 2C0A0500 v_mac_f32_e32 v4, v1, v2 ; 2C080501 s_or_b64 exec, exec, s[4:5] ; 87FE047E s_and_b64 exec, exec, s[0:1] ; 86FE007E v_rcp_f32_e32 v1, v20 ; 7E024514 v_mov_b32_e32 v2, 0 ; 7E040280 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 v_mul_f32_e32 v0, v1, v5 ; 0A000B01 v_mul_f32_e32 v1, v1, v4 ; 0A020901 Shader epilog disassembly: v_max_f32_e64 v0, v0, v0 clamp ; D10B8000 00020100 v_mov_b32_e32 v4, 0x477fff00 ; 7E0802FF 477FFF00 v_max_f32_e64 v1, v1, v1 clamp ; D10B8001 00020301 v_max_f32_e64 v2, v2, v2 clamp ; D10B8002 00020502 v_max_f32_e64 v3, v3, v3 clamp ; D10B8003 00020703 v_mad_f32 v0, v0, v4, 0.5 ; D1C10000 03C20900 v_mad_f32 v1, v1, v4, 0.5 ; D1C10001 03C20901 v_mad_f32 v2, v2, v4, 0.5 ; D1C10002 03C20902 v_mad_f32 v3, v3, v4, 0.5 ; D1C10003 03C20903 v_cvt_u32_f32_e32 v0, v0 ; 7E000F00 v_cvt_u32_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; 7E020EF9 06060501 v_cvt_u32_f32_e32 v2, v2 ; 7E040F02 v_cvt_u32_f32_sdwa v3, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; 7E060EF9 06060503 v_or_b32_e32 v0, v0, v1 ; 28000300 v_or_b32_e32 v1, v2, v3 ; 28020702 exp mrt0 v0, v0, v1, v1 done compr vm ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0002 *** SHADER STATS *** SGPRS: 40 VGPRS: 32 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 6116 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 8 ******************** VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MOV TEMP[0].xy, IN[0].xyxx 2: MOV OUT[0], TEMP[0] 3: MOV OUT[1].xy, IN[1].xyxx 4: MOV OUT[1].zw, IN[2].xxxy 5: END radeonsi: Compiling shader 22 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs void @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) #0 { main_body: %15 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 0, !amdgpu.uniform !0 %16 = load <4 x i32>, <4 x i32> addrspace(2)* %15, align 16, !invariant.load !0 %17 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %16, i32 %12, i32 0, i1 false, i1 false) #3 %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 1, !amdgpu.uniform !0 %21 = load <4 x i32>, <4 x i32> addrspace(2)* %20, align 16, !invariant.load !0 %22 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %21, i32 %13, i32 0, i1 false, i1 false) #3 %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 2, !amdgpu.uniform !0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !invariant.load !0 %27 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %26, i32 %14, i32 0, i1 false, i1 false) #3 %28 = extractelement <4 x float> %27, i32 0 %29 = extractelement <4 x float> %27, i32 1 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %23, float %24, float %28, float %29, i1 false, i1 false) #2 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %18, float %19, float 1.000000e+00, float 1.000000e+00, i1 true, i1 false) #2 ret void } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #2 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind } attributes #3 = { nounwind readnone } !0 = !{} FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0..3] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 1.0000, 2.4000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[1].x, TEMP[0], SAMP[0], 2D 2: MOV TEMP[2].xy, IN[0].zwww 3: TEX TEMP[3].xy, TEMP[2], SAMP[1], 2D 4: MUL TEMP[4].xyz, CONST[0].xyzz, TEMP[1].xxxx 5: MAD TEMP[5].xyz, CONST[1].xyzz, TEMP[3].xxxx, TEMP[4].xyzz 6: MAD TEMP[6].xyz, CONST[2].xyzz, TEMP[3].yyyy, TEMP[5].xyzz 7: ADD TEMP[7].xyz, TEMP[6].xyzz, CONST[3].xyzz 8: MOV TEMP[7].w, IMM[0].xxxx 9: MOV_SAT TEMP[7].xyz, TEMP[7].xyzz 10: POW TEMP[8].x, TEMP[7].xxxx, IMM[0].yyyy 11: POW TEMP[8].y, TEMP[7].yyyy, IMM[0].yyyy 12: POW TEMP[8].z, TEMP[7].zzzz, IMM[0].yyyy 13: MOV TEMP[7].xyz, TEMP[8].xyzx 14: MOV OUT[0], TEMP[7] 15: END radeonsi: Compiling shader 23 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %21 = bitcast <2 x i32> %6 to <2 x float> %22 = extractelement <2 x float> %21, i32 0 %23 = extractelement <2 x float> %21, i32 1 %24 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 0, i32 0, i32 %4) #3 %25 = call nsz float @llvm.amdgcn.interp.p2(float %24, float %23, i32 0, i32 0, i32 %4) #3 %26 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 1, i32 0, i32 %4) #3 %27 = call nsz float @llvm.amdgcn.interp.p2(float %26, float %23, i32 1, i32 0, i32 %4) #3 %28 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %29 = load <8 x i32>, <8 x i32> addrspace(2)* %28, align 32, !invariant.load !0 %30 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %31 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %30, i64 0, i64 35, !amdgpu.uniform !0 %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !invariant.load !0 %33 = bitcast float %25 to i32 %34 = bitcast float %27 to i32 %35 = insertelement <2 x i32> undef, i32 %33, i32 0 %36 = insertelement <2 x i32> %35, i32 %34, i32 1 %37 = bitcast <2 x i32> %36 to <2 x float> %38 = call float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float> %37, <8 x i32> %29, <4 x i32> %32, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %39 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 2, i32 0, i32 %4) #3 %40 = call nsz float @llvm.amdgcn.interp.p2(float %39, float %23, i32 2, i32 0, i32 %4) #3 %41 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 3, i32 0, i32 %4) #3 %42 = call nsz float @llvm.amdgcn.interp.p2(float %41, float %23, i32 3, i32 0, i32 %4) #3 %43 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !invariant.load !0 %45 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %30, i64 0, i64 39, !amdgpu.uniform !0 %46 = load <4 x i32>, <4 x i32> addrspace(2)* %45, align 16, !invariant.load !0 %47 = bitcast float %40 to i32 %48 = bitcast float %42 to i32 %49 = insertelement <2 x i32> undef, i32 %47, i32 0 %50 = insertelement <2 x i32> %49, i32 %48, i32 1 %51 = bitcast <2 x i32> %50 to <2 x float> %52 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %51, <8 x i32> %44, <4 x i32> %46, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #3 %53 = extractelement <4 x float> %52, i32 0 %54 = extractelement <4 x float> %52, i32 1 %55 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %1, i64 0, i64 16, !amdgpu.uniform !0 %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !invariant.load !0 %57 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 0) %58 = fmul nsz float %57, %38 %59 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 4) %60 = fmul nsz float %59, %38 %61 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 8) %62 = fmul nsz float %61, %38 %63 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 16) %64 = fmul nsz float %63, %53 %65 = fadd nsz float %64, %58 %66 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 20) %67 = fmul nsz float %66, %53 %68 = fadd nsz float %67, %60 %69 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 24) %70 = fmul nsz float %69, %53 %71 = fadd nsz float %70, %62 %72 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 32) %73 = fmul nsz float %72, %54 %74 = fadd nsz float %73, %65 %75 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 36) %76 = fmul nsz float %75, %54 %77 = fadd nsz float %76, %68 %78 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 40) %79 = fmul nsz float %78, %54 %80 = fadd nsz float %79, %71 %81 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 48) %82 = fadd nsz float %74, %81 %83 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 52) %84 = fadd nsz float %77, %83 %85 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %56, i32 56) %86 = fadd nsz float %80, %85 %87 = call nsz float @llvm.maxnum.f32(float %82, float 0.000000e+00) #3 %88 = call nsz float @llvm.minnum.f32(float %87, float 1.000000e+00) #3 %89 = call nsz float @llvm.maxnum.f32(float %84, float 0.000000e+00) #3 %90 = call nsz float @llvm.minnum.f32(float %89, float 1.000000e+00) #3 %91 = call nsz float @llvm.maxnum.f32(float %86, float 0.000000e+00) #3 %92 = call nsz float @llvm.minnum.f32(float %91, float 1.000000e+00) #3 %93 = call nsz float @llvm.pow.f32(float %88, float 0x4003333340000000) #3 %94 = call nsz float @llvm.pow.f32(float %90, float 0x4003333340000000) #3 %95 = call nsz float @llvm.pow.f32(float %92, float 0x4003333340000000) #3 %96 = bitcast float %3 to i32 %97 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %96, 6 %98 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %97, float %93, 7 %99 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %98, float %94, 8 %100 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %99, float %95, 9 %101 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %100, float 1.000000e+00, 10 %102 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %101, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %102 } ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #3 ; Function Attrs: nounwind readnone speculatable declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readonly declare float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind readonly } attributes #3 = { nounwind readnone } !0 = !{} radeonsi: Compiling shader 24 Vertex Shader Prolog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> @vs_prolog(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %16 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> undef, i32 %0, 0 %17 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %16, i32 %1, 1 %18 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %17, i32 %2, 2 %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %18, i32 %3, 3 %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %19, i32 %4, 4 %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %20, i32 %5, 5 %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %21, i32 %6, 6 %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %22, i32 %7, 7 %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %23, i32 %8, 8 %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %24, i32 %9, 9 %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %25, i32 %10, 10 %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %26, i32 %11, 11 %28 = bitcast i32 %12 to float %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %27, float %28, 12 %30 = bitcast i32 %13 to float %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %29, float %30, 13 %32 = bitcast i32 %14 to float %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %31, float %32, 14 %34 = bitcast i32 %15 to float %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %33, float %34, 15 %36 = add i32 %12, %8 %37 = bitcast i32 %36 to float %38 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %35, float %37, 16 %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %38, float %37, 17 %40 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %39, float %37, 18 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float }> %40 } attributes #0 = { "no-signed-zeros-fp-math"="true" } SHADER KEY part.vs.prolog.instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} mono.vs.fix_fetch = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 mono.u.vs_export_prim_id = 0 opt.kill_outputs[0] = 0x0 opt.kill_outputs[1] = 0x0 opt.clip_disable = 0 Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s8, v0 ; 32080008 v_mov_b32_e32 v5, v4 ; 7E0A0304 v_mov_b32_e32 v6, v4 ; 7E0C0304 Shader main disassembly: s_load_dwordx4 s[8:11], s[6:7], 0x10 ; C00A0203 00000010 s_load_dwordx4 s[0:3], s[6:7], 0x0 ; C00A0003 00000000 s_load_dwordx4 s[4:7], s[6:7], 0x20 ; C00A0103 00000020 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v5, s[8:11], 0 idxen ; E00C2000 80020005 buffer_load_format_xyzw v[5:8], v6, s[4:7], 0 idxen ; E00C2000 80010506 s_waitcnt vmcnt(0) ; BF8C0F70 buffer_load_format_xyzw v[7:10], v4, s[0:3], 0 idxen ; E00C2000 80000704 exp param0 v0, v1, v5, v6 ; C400020F 06050100 s_waitcnt expcnt(0) ; BF8C0F0F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 s_waitcnt vmcnt(0) ; BF8C0F70 exp pos0 v7, v8, v0, v0 done ; C40008CF 00000807 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 100 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x5 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b64 s[0:1], exec ; BE80017E s_wqm_b64 exec, exec ; BEFE077E s_load_dwordx8 s[8:15], s[4:5], 0x200 ; C00E0202 00000200 s_load_dwordx4 s[24:27], s[4:5], 0x230 ; C00A0602 00000230 s_mov_b32 m0, s7 ; BEFC0007 v_interp_p1_f32 v0, v2, attr0.x ; D4000002 v_interp_p1_f32 v1, v2, attr0.y ; D4040102 v_interp_p2_f32 v0, v3, attr0.x ; D4010003 v_interp_p2_f32 v1, v3, attr0.y ; D4050103 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v4, v[0:1], s[8:15], s[24:27] dmask:0x1 ; F0800100 00C20400 s_load_dwordx8 s[16:23], s[4:5], 0x240 ; C00E0402 00000240 s_load_dwordx4 s[8:11], s[4:5], 0x270 ; C00A0202 00000270 v_interp_p1_f32 v0, v2, attr0.z ; D4000202 v_interp_p1_f32 v1, v2, attr0.w ; D4040302 v_interp_p2_f32 v0, v3, attr0.z ; D4010203 v_interp_p2_f32 v1, v3, attr0.w ; D4050303 s_and_b64 exec, exec, s[0:1] ; 86FE007E s_load_dwordx4 s[0:3], s[2:3], 0x100 ; C00A0001 00000100 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:1], v[0:1], s[16:23], s[8:11] dmask:0x3 ; F0800300 00440000 s_buffer_load_dword s4, s[0:3], 0x0 ; C0220100 00000000 s_buffer_load_dword s5, s[0:3], 0x4 ; C0220140 00000004 s_buffer_load_dword s7, s[0:3], 0x8 ; C02201C0 00000008 s_buffer_load_dword s8, s[0:3], 0x10 ; C0220200 00000010 s_buffer_load_dword s9, s[0:3], 0x14 ; C0220240 00000014 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v2, s4, v4 ; 0A040804 s_buffer_load_dword s4, s[0:3], 0x18 ; C0220100 00000018 v_mul_f32_e32 v3, s5, v4 ; 0A060805 v_mul_f32_e32 v4, s7, v4 ; 0A080807 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v2, s8, v0 ; 2C040008 s_buffer_load_dword s5, s[0:3], 0x20 ; C0220140 00000020 s_buffer_load_dword s7, s[0:3], 0x24 ; C02201C0 00000024 s_buffer_load_dword s8, s[0:3], 0x28 ; C0220200 00000028 v_mac_f32_e32 v3, s9, v0 ; 2C060009 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v4, s4, v0 ; 2C080004 s_buffer_load_dword s9, s[0:3], 0x30 ; C0220240 00000030 s_buffer_load_dword s4, s[0:3], 0x34 ; C0220100 00000034 s_buffer_load_dword s0, s[0:3], 0x38 ; C0220000 00000038 v_mac_f32_e32 v2, s5, v1 ; 2C040205 v_mac_f32_e32 v3, s7, v1 ; 2C060207 v_mac_f32_e32 v4, s8, v1 ; 2C080208 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e64 v0, v2, s9 clamp ; D1018000 00001302 v_add_f32_e64 v1, v3, s4 clamp ; D1018001 00000903 v_add_f32_e64 v2, v4, s0 clamp ; D1018002 00000104 v_log_f32_e32 v0, v0 ; 7E004300 v_log_f32_e32 v1, v1 ; 7E024301 v_log_f32_e32 v2, v2 ; 7E044302 v_mov_b32_e32 v3, 0x4019999a ; 7E0602FF 4019999A v_mul_legacy_f32_e32 v0, v3, v0 ; 08000103 v_mul_legacy_f32_e32 v1, v3, v1 ; 08020303 v_mul_legacy_f32_e32 v2, v3, v2 ; 08040503 v_exp_f32_e32 v0, v0 ; 7E004100 v_exp_f32_e32 v1, v1 ; 7E024101 v_exp_f32_e32 v2, v2 ; 7E044102 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 Shader epilog disassembly: v_max_f32_e64 v0, v0, v0 clamp ; D10B8000 00020100 v_mov_b32_e32 v4, 0x477fff00 ; 7E0802FF 477FFF00 v_max_f32_e64 v1, v1, v1 clamp ; D10B8001 00020301 v_max_f32_e64 v2, v2, v2 clamp ; D10B8002 00020502 v_max_f32_e64 v3, v3, v3 clamp ; D10B8003 00020703 v_mad_f32 v0, v0, v4, 0.5 ; D1C10000 03C20900 v_mad_f32 v1, v1, v4, 0.5 ; D1C10001 03C20901 v_mad_f32 v2, v2, v4, 0.5 ; D1C10002 03C20902 v_mad_f32 v3, v3, v4, 0.5 ; D1C10003 03C20903 v_cvt_u32_f32_e32 v0, v0 ; 7E000F00 v_cvt_u32_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; 7E020EF9 06060501 v_cvt_u32_f32_e32 v2, v2 ; 7E040F02 v_cvt_u32_f32_sdwa v3, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; 7E060EF9 06060503 v_or_b32_e32 v0, v0, v1 ; 28000300 v_or_b32_e32 v1, v2, v3 ; 28020702 exp mrt0 v0, v0, v1, v1 done compr vm ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0002 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 448 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** FRAG DCL IN[0].xy, GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 1.0000, 0.4167, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[1].xyz, TEMP[0], SAMP[0], 2D 2: MOV TEMP[2].w, IMM[0].xxxx 3: MOV_SAT TEMP[2].xyz, TEMP[1].xyzz 4: MOV TEMP[2].xyz, TEMP[2].xyzx 5: POW TEMP[3].x, TEMP[2].xxxx, IMM[0].yyyy 6: POW TEMP[3].y, TEMP[2].yyyy, IMM[0].yyyy 7: POW TEMP[3].z, TEMP[2].zzzz, IMM[0].yyyy 8: MOV TEMP[2].xyz, TEMP[3].xyzx 9: MOV OUT[0], TEMP[2] 10: END radeonsi: Compiling shader 25 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %21 = bitcast <2 x i32> %6 to <2 x float> %22 = extractelement <2 x float> %21, i32 0 %23 = extractelement <2 x float> %21, i32 1 %24 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 0, i32 0, i32 %4) #3 %25 = call nsz float @llvm.amdgcn.interp.p2(float %24, float %23, i32 0, i32 0, i32 %4) #3 %26 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 1, i32 0, i32 %4) #3 %27 = call nsz float @llvm.amdgcn.interp.p2(float %26, float %23, i32 1, i32 0, i32 %4) #3 %28 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %29 = load <8 x i32>, <8 x i32> addrspace(2)* %28, align 32, !invariant.load !0 %30 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %31 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %30, i64 0, i64 35, !amdgpu.uniform !0 %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !invariant.load !0 %33 = bitcast float %25 to i32 %34 = bitcast float %27 to i32 %35 = insertelement <2 x i32> undef, i32 %33, i32 0 %36 = insertelement <2 x i32> %35, i32 %34, i32 1 %37 = bitcast <2 x i32> %36 to <2 x float> %38 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %37, <8 x i32> %29, <4 x i32> %32, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #3 %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = call nsz float @llvm.maxnum.f32(float %39, float 0.000000e+00) #3 %43 = call nsz float @llvm.minnum.f32(float %42, float 1.000000e+00) #3 %44 = call nsz float @llvm.maxnum.f32(float %40, float 0.000000e+00) #3 %45 = call nsz float @llvm.minnum.f32(float %44, float 1.000000e+00) #3 %46 = call nsz float @llvm.maxnum.f32(float %41, float 0.000000e+00) #3 %47 = call nsz float @llvm.minnum.f32(float %46, float 1.000000e+00) #3 %48 = call nsz float @llvm.pow.f32(float %43, float 0x3FDAAAAAA0000000) #3 %49 = call nsz float @llvm.pow.f32(float %45, float 0x3FDAAAAAA0000000) #3 %50 = call nsz float @llvm.pow.f32(float %47, float 0x3FDAAAAAA0000000) #3 %51 = bitcast float %3 to i32 %52 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %51, 6 %53 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %52, float %48, 7 %54 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %53, float %49, 8 %55 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %54, float %50, 9 %56 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %55, float 1.000000e+00, 10 %57 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %56, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %57 } ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.pow.f32(float, float) #1 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind readonly } attributes #3 = { nounwind readnone } !0 = !{} SHADER KEY part.vs.prolog.instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} mono.vs.fix_fetch = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 mono.u.vs_export_prim_id = 0 opt.kill_outputs[0] = 0x0 opt.kill_outputs[1] = 0x0 opt.clip_disable = 0 Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s8, v0 ; 32080008 v_mov_b32_e32 v5, v4 ; 7E0A0304 Shader main disassembly: s_load_dwordx4 s[0:3], s[6:7], 0x0 ; C00A0003 00000000 s_load_dwordx4 s[4:7], s[6:7], 0x10 ; C00A0103 00000010 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v5, s[4:7], 0 idxen ; E00C2000 80010005 s_waitcnt vmcnt(0) ; BF8C0F70 buffer_load_format_xyzw v[2:5], v4, s[0:3], 0 idxen ; E00C2000 80000204 exp param0 v0, v1, v0, v0 ; C400020F 00000100 s_waitcnt expcnt(0) ; BF8C0F0F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 s_waitcnt vmcnt(0) ; BF8C0F70 exp pos0 v2, v3, v0, v0 done ; C40008CF 00000302 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x5 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b64 s[16:17], exec ; BE90017E s_wqm_b64 exec, exec ; BEFE077E s_load_dwordx8 s[8:15], s[4:5], 0x200 ; C00E0202 00000200 s_load_dwordx4 s[0:3], s[4:5], 0x230 ; C00A0002 00000230 s_mov_b32 m0, s7 ; BEFC0007 v_interp_p1_f32 v0, v2, attr0.x ; D4000002 v_interp_p1_f32 v1, v2, attr0.y ; D4040102 v_interp_p2_f32 v0, v3, attr0.x ; D4010003 v_interp_p2_f32 v1, v3, attr0.y ; D4050103 s_and_b64 exec, exec, s[16:17] ; 86FE107E s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:2], v[0:1], s[8:15], s[0:3] dmask:0x7 ; F0800700 00020000 s_waitcnt vmcnt(0) ; BF8C0F70 v_max_f32_e64 v0, v0, v0 clamp ; D10B8000 00020100 v_max_f32_e64 v1, v1, v1 clamp ; D10B8001 00020301 v_max_f32_e64 v2, v2, v2 clamp ; D10B8002 00020502 v_log_f32_e32 v0, v0 ; 7E004300 v_log_f32_e32 v1, v1 ; 7E024301 v_log_f32_e32 v2, v2 ; 7E044302 v_mov_b32_e32 v3, 0x3ed55555 ; 7E0602FF 3ED55555 v_mul_legacy_f32_e32 v0, v3, v0 ; 08000103 v_mul_legacy_f32_e32 v1, v3, v1 ; 08020303 v_mul_legacy_f32_e32 v2, v3, v2 ; 08040503 v_exp_f32_e32 v0, v0 ; 7E004100 v_exp_f32_e32 v1, v1 ; 7E024101 v_exp_f32_e32 v2, v2 ; 7E044102 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 Shader epilog disassembly: v_max_f32_e64 v0, v0, v0 clamp ; D10B8000 00020100 v_mov_b32_e32 v4, 0x477fff00 ; 7E0802FF 477FFF00 v_max_f32_e64 v1, v1, v1 clamp ; D10B8001 00020301 v_max_f32_e64 v2, v2, v2 clamp ; D10B8002 00020502 v_max_f32_e64 v3, v3, v3 clamp ; D10B8003 00020703 v_mad_f32 v0, v0, v4, 0.5 ; D1C10000 03C20900 v_mad_f32 v1, v1, v4, 0.5 ; D1C10001 03C20901 v_mad_f32 v2, v2, v4, 0.5 ; D1C10002 03C20902 v_mad_f32 v3, v3, v4, 0.5 ; D1C10003 03C20903 v_cvt_u32_f32_e32 v0, v0 ; 7E000F00 v_cvt_u32_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; 7E020EF9 06060501 v_cvt_u32_f32_e32 v2, v2 ; 7E040F02 v_cvt_u32_f32_sdwa v3, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; 7E060EF9 06060503 v_or_b32_e32 v0, v0, v1 ; 28000300 v_or_b32_e32 v1, v2, v3 ; 28020702 exp mrt0 v0, v0, v1, v1 done compr vm ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0002 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 252 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** FRAG DCL IN[0].xy, GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].w, IMM[0].xxxx 1: MOV TEMP[1].xy, IN[0].xyyy 2: TEX TEMP[2].xyz, TEMP[1], SAMP[0], 2D 3: MOV TEMP[0].xyz, TEMP[2].xyzx 4: MOV OUT[0], TEMP[0] 5: END radeonsi: Compiling shader 26 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %21 = bitcast <2 x i32> %6 to <2 x float> %22 = extractelement <2 x float> %21, i32 0 %23 = extractelement <2 x float> %21, i32 1 %24 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 0, i32 0, i32 %4) #3 %25 = call nsz float @llvm.amdgcn.interp.p2(float %24, float %23, i32 0, i32 0, i32 %4) #3 %26 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 1, i32 0, i32 %4) #3 %27 = call nsz float @llvm.amdgcn.interp.p2(float %26, float %23, i32 1, i32 0, i32 %4) #3 %28 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %29 = load <8 x i32>, <8 x i32> addrspace(2)* %28, align 32, !invariant.load !0 %30 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %31 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %30, i64 0, i64 35, !amdgpu.uniform !0 %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !invariant.load !0 %33 = bitcast float %25 to i32 %34 = bitcast float %27 to i32 %35 = insertelement <2 x i32> undef, i32 %33, i32 0 %36 = insertelement <2 x i32> %35, i32 %34, i32 1 %37 = bitcast <2 x i32> %36 to <2 x float> %38 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %37, <8 x i32> %29, <4 x i32> %32, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #3 %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = bitcast float %3 to i32 %43 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %42, 6 %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %43, float %39, 7 %45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float %40, 8 %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45, float %41, 9 %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46, float 1.000000e+00, 10 %48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %48 } ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind readonly } attributes #3 = { nounwind readnone } !0 = !{} SHADER KEY part.vs.prolog.instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} mono.vs.fix_fetch = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 mono.u.vs_export_prim_id = 0 opt.kill_outputs[0] = 0x0 opt.kill_outputs[1] = 0x0 opt.clip_disable = 0 Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s8, v0 ; 32080008 v_mov_b32_e32 v5, v4 ; 7E0A0304 Shader main disassembly: s_load_dwordx4 s[0:3], s[6:7], 0x0 ; C00A0003 00000000 s_load_dwordx4 s[4:7], s[6:7], 0x10 ; C00A0103 00000010 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v5, s[4:7], 0 idxen ; E00C2000 80010005 s_waitcnt vmcnt(0) ; BF8C0F70 buffer_load_format_xyzw v[2:5], v4, s[0:3], 0 idxen ; E00C2000 80000204 exp param0 v0, v1, v0, v0 ; C400020F 00000100 s_waitcnt expcnt(0) ; BF8C0F0F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 s_waitcnt vmcnt(0) ; BF8C0F70 exp pos0 v2, v3, v0, v0 done ; C40008CF 00000302 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x5 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b64 s[16:17], exec ; BE90017E s_wqm_b64 exec, exec ; BEFE077E s_load_dwordx8 s[8:15], s[4:5], 0x200 ; C00E0202 00000200 s_load_dwordx4 s[0:3], s[4:5], 0x230 ; C00A0002 00000230 s_mov_b32 m0, s7 ; BEFC0007 v_interp_p1_f32 v0, v2, attr0.x ; D4000002 v_interp_p1_f32 v1, v2, attr0.y ; D4040102 v_interp_p2_f32 v0, v3, attr0.x ; D4010003 v_interp_p2_f32 v1, v3, attr0.y ; D4050103 s_and_b64 exec, exec, s[16:17] ; 86FE107E s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:2], v[0:1], s[8:15], s[0:3] dmask:0x7 ; F0800700 00020000 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 s_waitcnt vmcnt(0) ; BF8C0F70 Shader epilog disassembly: v_max_f32_e64 v0, v0, v0 clamp ; D10B8000 00020100 v_mov_b32_e32 v4, 0x477fff00 ; 7E0802FF 477FFF00 v_max_f32_e64 v1, v1, v1 clamp ; D10B8001 00020301 v_max_f32_e64 v2, v2, v2 clamp ; D10B8002 00020502 v_max_f32_e64 v3, v3, v3 clamp ; D10B8003 00020703 v_mad_f32 v0, v0, v4, 0.5 ; D1C10000 03C20900 v_mad_f32 v1, v1, v4, 0.5 ; D1C10001 03C20901 v_mad_f32 v2, v2, v4, 0.5 ; D1C10002 03C20902 v_mad_f32 v3, v3, v4, 0.5 ; D1C10003 03C20903 v_cvt_u32_f32_e32 v0, v0 ; 7E000F00 v_cvt_u32_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; 7E020EF9 06060501 v_cvt_u32_f32_e32 v2, v2 ; 7E040F02 v_cvt_u32_f32_sdwa v3, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; 7E060EF9 06060503 v_or_b32_e32 v0, v0, v1 ; 28000300 v_or_b32_e32 v1, v2, v3 ; 28020702 exp mrt0 v0, v0, v1, v1 done compr vm ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0002 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 184 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x5 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b32 m0, s7 ; BEFC0007 v_interp_p1_f32 v0, v8, attr0.x ; D4000008 v_interp_p1_f32 v1, v8, attr0.y ; D4040108 v_interp_p2_f32 v0, v9, attr0.x ; D4010009 v_interp_p2_f32 v1, v9, attr0.y ; D4050109 s_load_dwordx8 s[8:15], s[4:5], 0x200 ; C00E0202 00000200 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 s_waitcnt lgkmcnt(0) ; BF8C007F image_load v[0:3], v[0:1], s[8:15] dmask:0xf unorm ; F0001F00 00020000 s_waitcnt vmcnt(0) ; BF8C0F70 Shader epilog disassembly: v_max_f32_e64 v0, v0, v0 clamp ; D10B8000 00020100 v_mov_b32_e32 v4, 0x477fff00 ; 7E0802FF 477FFF00 v_max_f32_e64 v1, v1, v1 clamp ; D10B8001 00020301 v_max_f32_e64 v2, v2, v2 clamp ; D10B8002 00020502 v_max_f32_e64 v3, v3, v3 clamp ; D10B8003 00020703 v_mad_f32 v0, v0, v4, 0.5 ; D1C10000 03C20900 v_mad_f32 v1, v1, v4, 0.5 ; D1C10001 03C20901 v_mad_f32 v2, v2, v4, 0.5 ; D1C10002 03C20902 v_mad_f32 v3, v3, v4, 0.5 ; D1C10003 03C20903 v_cvt_u32_f32_e32 v0, v0 ; 7E000F00 v_cvt_u32_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; 7E020EF9 06060501 v_cvt_u32_f32_e32 v2, v2 ; 7E040F02 v_cvt_u32_f32_sdwa v3, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; 7E060EF9 06060503 v_or_b32_e32 v0, v0, v1 ; 28000300 v_or_b32_e32 v1, v2, v3 ; 28020702 exp mrt0 v0, v0, v1, v1 done compr vm ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0020 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 168 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** FRAG DCL IN[0].xy, GENERIC[0], PERSPECTIVE DCL SV[0], POSITION DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1..2] DCL CONST[0] DCL TEMP[0] DCL TEMP[1..11], LOCAL IMM[0] FLT32 { 1.0000, 0.0156, 255.0000, 0.0001} IMM[1] FLT32 { 0.0039, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0], SV[0] 1: MAD TEMP[0].y, SV[0], CONST[2].xxxx, CONST[2].yyyy 2: MOV TEMP[1].w, IMM[0].xxxx 3: MOV TEMP[2].xy, IN[0].xyyy 4: TEX TEMP[3].xyz, TEMP[2], SAMP[1], 2D 5: MOV TEMP[1].xyz, TEMP[3].xyzx 6: MUL TEMP[4].xy, TEMP[0].xyyy, IMM[0].yyyy 7: MUL TEMP[5].xy, CONST[0].xyyy, TEMP[4].xxxx 8: MAD TEMP[6].xy, CONST[1].xyyy, TEMP[4].yyyy, TEMP[5].xyyy 9: MOV TEMP[7].xy, TEMP[6].xyyy 10: TEX TEMP[8].x, TEMP[7], SAMP[0], 2D 11: MAD TEMP[9], TEMP[1], IMM[0].zzzz, TEMP[8].xxxx 12: ADD TEMP[10], TEMP[9], IMM[0].wwww 13: FLR TEMP[11], TEMP[10] 14: MUL TEMP[1], TEMP[11], IMM[1].xxxx 15: MOV OUT[0], TEMP[1] 16: END radeonsi: Compiling shader 27 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %21 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %1, i64 0, i64 16, !amdgpu.uniform !0 %22 = load <4 x i32>, <4 x i32> addrspace(2)* %21, align 16, !invariant.load !0 %23 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 32) %24 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 36) %25 = fmul nsz float %23, %14 %26 = fadd nsz float %25, %24 %27 = bitcast <2 x i32> %6 to <2 x float> %28 = extractelement <2 x float> %27, i32 0 %29 = extractelement <2 x float> %27, i32 1 %30 = call nsz float @llvm.amdgcn.interp.p1(float %28, i32 0, i32 0, i32 %4) #1 %31 = call nsz float @llvm.amdgcn.interp.p2(float %30, float %29, i32 0, i32 0, i32 %4) #1 %32 = call nsz float @llvm.amdgcn.interp.p1(float %28, i32 1, i32 0, i32 %4) #1 %33 = call nsz float @llvm.amdgcn.interp.p2(float %32, float %29, i32 1, i32 0, i32 %4) #1 %34 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %35 = load <8 x i32>, <8 x i32> addrspace(2)* %34, align 32, !invariant.load !0 %36 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %37 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %36, i64 0, i64 39, !amdgpu.uniform !0 %38 = load <4 x i32>, <4 x i32> addrspace(2)* %37, align 16, !invariant.load !0 %39 = bitcast float %31 to i32 %40 = bitcast float %33 to i32 %41 = insertelement <2 x i32> undef, i32 %39, i32 0 %42 = insertelement <2 x i32> %41, i32 %40, i32 1 %43 = bitcast <2 x i32> %42 to <2 x float> %44 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %43, <8 x i32> %35, <4 x i32> %38, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = fmul nsz float %13, 1.562500e-02 %49 = fmul nsz float %26, 1.562500e-02 %50 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 0) %51 = fmul nsz float %50, %48 %52 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 4) %53 = fmul nsz float %52, %48 %54 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 16) %55 = fmul nsz float %54, %49 %56 = fadd nsz float %55, %51 %57 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 20) %58 = fmul nsz float %57, %49 %59 = fadd nsz float %58, %53 %60 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !invariant.load !0 %62 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %36, i64 0, i64 35, !amdgpu.uniform !0 %63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !invariant.load !0 %64 = bitcast float %56 to i32 %65 = bitcast float %59 to i32 %66 = insertelement <2 x i32> undef, i32 %64, i32 0 %67 = insertelement <2 x i32> %66, i32 %65, i32 1 %68 = bitcast <2 x i32> %67 to <2 x float> %69 = call float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float> %68, <8 x i32> %61, <4 x i32> %63, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %70 = fmul nsz float %45, 2.550000e+02 %71 = fadd nsz float %70, %69 %72 = fmul nsz float %46, 2.550000e+02 %73 = fadd nsz float %72, %69 %74 = fmul nsz float %47, 2.550000e+02 %75 = fadd nsz float %74, %69 %76 = fadd nsz float %69, 2.550000e+02 %77 = fadd nsz float %71, 0x3F20000000000000 %78 = fadd nsz float %73, 0x3F20000000000000 %79 = fadd nsz float %75, 0x3F20000000000000 %80 = fadd nsz float %76, 0x3F20000000000000 %81 = call nsz float @llvm.floor.f32(float %77) #1 %82 = call nsz float @llvm.floor.f32(float %78) #1 %83 = call nsz float @llvm.floor.f32(float %79) #1 %84 = call nsz float @llvm.floor.f32(float %80) #1 %85 = fmul nsz float %81, 0x3F70101020000000 %86 = fmul nsz float %82, 0x3F70101020000000 %87 = fmul nsz float %83, 0x3F70101020000000 %88 = fmul nsz float %84, 0x3F70101020000000 %89 = bitcast float %3 to i32 %90 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %89, 6 %91 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %90, float %85, 7 %92 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %91, float %86, 8 %93 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %92, float %87, 9 %94 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %93, float %88, 10 %95 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %94, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %95 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3 ; Function Attrs: nounwind readnone speculatable declare float @llvm.floor.f32(float) #2 ; Function Attrs: nounwind readonly declare float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone speculatable } attributes #3 = { nounwind readonly } !0 = !{} SHADER KEY part.vs.prolog.instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} mono.vs.fix_fetch = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 mono.u.vs_export_prim_id = 0 opt.kill_outputs[0] = 0x0 opt.kill_outputs[1] = 0x0 opt.clip_disable = 0 Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s8, v0 ; 32080008 v_mov_b32_e32 v5, v4 ; 7E0A0304 Shader main disassembly: s_load_dwordx4 s[0:3], s[6:7], 0x0 ; C00A0003 00000000 s_load_dwordx4 s[4:7], s[6:7], 0x10 ; C00A0103 00000010 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v5, s[4:7], 0 idxen ; E00C2000 80010005 s_waitcnt vmcnt(0) ; BF8C0F70 buffer_load_format_xyzw v[2:5], v4, s[0:3], 0 idxen ; E00C2000 80000204 exp param0 v0, v1, v0, v0 ; C400020F 00000100 s_waitcnt expcnt(0) ; BF8C0F0F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 s_waitcnt vmcnt(0) ; BF8C0F70 exp pos0 v2, v3, v0, v0 done ; C40008CF 00000302 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x4 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b64 s[20:21], exec ; BE94017E s_wqm_b64 exec, exec ; BEFE077E s_load_dwordx4 s[0:3], s[2:3], 0x100 ; C00A0001 00000100 s_load_dwordx4 s[16:19], s[4:5], 0x270 ; C00A0402 00000270 s_mov_b32 m0, s7 ; BEFC0007 v_interp_p1_f32 v0, v2, attr0.x ; D4000002 v_interp_p1_f32 v1, v2, attr0.y ; D4040102 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x24 ; C0220200 00000024 s_buffer_load_dword s22, s[0:3], 0x0 ; C0220580 00000000 s_buffer_load_dword s23, s[0:3], 0x4 ; C02205C0 00000004 s_buffer_load_dword s24, s[0:3], 0x10 ; C0220600 00000010 s_buffer_load_dword s25, s[0:3], 0x14 ; C0220640 00000014 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s8 ; 7E080208 s_buffer_load_dword s0, s[0:3], 0x20 ; C0220000 00000020 s_load_dwordx8 s[8:15], s[4:5], 0x240 ; C00E0202 00000240 v_interp_p2_f32 v0, v3, attr0.x ; D4010003 v_interp_p2_f32 v1, v3, attr0.y ; D4050103 v_mov_b32_e32 v3, 0x3c800000 ; 7E0602FF 3C800000 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v4, s0, v13 ; 2C081A00 image_sample v[0:2], v[0:1], s[8:15], s[16:19] dmask:0x7 ; F0800700 00820000 s_load_dwordx4 s[0:3], s[4:5], 0x230 ; C00A0002 00000230 s_load_dwordx8 s[8:15], s[4:5], 0x200 ; C00E0202 00000200 v_mul_f32_e32 v5, v3, v12 ; 0A0A1903 v_mul_f32_e32 v6, v3, v4 ; 0A0C0903 v_mul_f32_e32 v3, s22, v5 ; 0A060A16 v_mul_f32_e32 v4, s23, v5 ; 0A080A17 v_mac_f32_e32 v3, s24, v6 ; 2C060C18 v_mac_f32_e32 v4, s25, v6 ; 2C080C19 s_and_b64 exec, exec, s[20:21] ; 86FE147E s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v3, v[3:4], s[8:15], s[0:3] dmask:0x1 ; F0800100 00020303 v_mov_b32_e32 v4, 0x437f0000 ; 7E0802FF 437F0000 s_waitcnt vmcnt(0) ; BF8C0F70 v_mad_f32 v0, v4, v0, v3 ; D1C10000 040E0104 v_mad_f32 v1, v4, v1, v3 ; D1C10001 040E0304 v_mad_f32 v2, v4, v2, v3 ; D1C10002 040E0504 v_add_f32_e32 v3, v4, v3 ; 02060704 v_mov_b32_e32 v4, 0x39000000 ; 7E0802FF 39000000 v_add_f32_e32 v0, v4, v0 ; 02000104 v_add_f32_e32 v1, v4, v1 ; 02020304 v_add_f32_e32 v2, v4, v2 ; 02040504 v_add_f32_e32 v3, v4, v3 ; 02060704 v_floor_f32_e32 v0, v0 ; 7E003F00 v_mov_b32_e32 v4, 0x3b808081 ; 7E0802FF 3B808081 v_floor_f32_e32 v1, v1 ; 7E023F01 v_floor_f32_e32 v2, v2 ; 7E043F02 v_floor_f32_e32 v3, v3 ; 7E063F03 v_mul_f32_e32 v0, v4, v0 ; 0A000104 v_mul_f32_e32 v1, v4, v1 ; 0A020304 v_mul_f32_e32 v2, v4, v2 ; 0A040504 v_mul_f32_e32 v3, v4, v3 ; 0A060704 v_mov_b32_e32 v13, v15 ; 7E1A030F Shader epilog disassembly: v_cvt_pkrtz_f16_f32 v0, v0, v1 ; D2960000 00020300 v_cvt_pkrtz_f16_f32 v1, v2, v3 ; D2960001 00020702 exp mrt0 v0, v0, v1, v1 done compr vm ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd377 SPI_PS_INPUT_ENA = 0x0302 *** SHADER STATS *** SGPRS: 32 VGPRS: 17 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 328 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MOV TEMP[0].xy, IN[0].xyxx 2: MOV OUT[0], TEMP[0] 3: MOV OUT[1].xy, IN[1].xyxx 4: MOV OUT[1].zw, IN[2].xxxy 5: MOV OUT[2].xy, IN[3].xyxx 6: MOV OUT[2].zw, IN[4].xxxy 7: END radeonsi: Compiling shader 28 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs void @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0 { main_body: %17 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 0, !amdgpu.uniform !0 %18 = load <4 x i32>, <4 x i32> addrspace(2)* %17, align 16, !invariant.load !0 %19 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %18, i32 %12, i32 0, i1 false, i1 false) #3 %20 = extractelement <4 x float> %19, i32 0 %21 = extractelement <4 x float> %19, i32 1 %22 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 1, !amdgpu.uniform !0 %23 = load <4 x i32>, <4 x i32> addrspace(2)* %22, align 16, !invariant.load !0 %24 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %23, i32 %13, i32 0, i1 false, i1 false) #3 %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 2, !amdgpu.uniform !0 %28 = load <4 x i32>, <4 x i32> addrspace(2)* %27, align 16, !invariant.load !0 %29 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %28, i32 %14, i32 0, i1 false, i1 false) #3 %30 = extractelement <4 x float> %29, i32 0 %31 = extractelement <4 x float> %29, i32 1 %32 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 3, !amdgpu.uniform !0 %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !invariant.load !0 %34 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %33, i32 %15, i32 0, i1 false, i1 false) #3 %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 4, !amdgpu.uniform !0 %38 = load <4 x i32>, <4 x i32> addrspace(2)* %37, align 16, !invariant.load !0 %39 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %38, i32 %16, i32 0, i1 false, i1 false) #3 %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %25, float %26, float %30, float %31, i1 false, i1 false) #2 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %35, float %36, float %40, float %41, i1 false, i1 false) #2 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %20, float %21, float 1.000000e+00, float 1.000000e+00, i1 true, i1 false) #2 ret void } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #2 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind } attributes #3 = { nounwind readnone } !0 = !{} FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL SV[0], POSITION DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL CONST[2..3] DCL CONST[0..1] DCL TEMP[0] DCL TEMP[1..21], LOCAL IMM[0] FLT32 { 0.5000, 0.0078, 0.9922, 0.0156} IMM[1] FLT32 { 255.0000, 0.0001, 0.0039, 0.0000} 0: MOV TEMP[0], SV[0] 1: MAD TEMP[0].y, SV[0], CONST[3].xxxx, CONST[3].yyyy 2: MOV TEMP[1].x, IMM[0].xxxx 3: LRP TEMP[2].x, CONST[0].xxxx, IMM[0].zzzz, IMM[0].yyyy 4: MOV TEMP[1].y, TEMP[2].xxxx 5: MOV TEMP[3].xy, TEMP[1].xyyy 6: TEX TEMP[4], TEMP[3], SAMP[0], 2D 7: MOV TEMP[5].xy, IN[0].xyyy 8: TEX TEMP[6], TEMP[5], SAMP[2], 2D 9: MUL TEMP[7], TEMP[4].xxxx, TEMP[6] 10: MOV TEMP[8].xy, IN[0].zwww 11: TEX TEMP[9], TEMP[8], SAMP[3], 2D 12: MAD TEMP[7], TEMP[4].yyyy, TEMP[9], TEMP[7] 13: MOV TEMP[10].xy, IN[1].xyyy 14: TEX TEMP[11], TEMP[10], SAMP[4], 2D 15: MAD TEMP[7], TEMP[4].zzzz, TEMP[11], TEMP[7] 16: MOV TEMP[12].xy, IN[1].zwww 17: TEX TEMP[13], TEMP[12], SAMP[5], 2D 18: MAD TEMP[7], TEMP[4].wwww, TEMP[13], TEMP[7] 19: MUL TEMP[14].xy, TEMP[0].xyyy, IMM[0].wwww 20: MUL TEMP[15].xy, CONST[1].xyyy, TEMP[14].xxxx 21: MAD TEMP[16].xy, CONST[2].xyyy, TEMP[14].yyyy, TEMP[15].xyyy 22: MOV TEMP[17].xy, TEMP[16].xyyy 23: TEX TEMP[18].x, TEMP[17], SAMP[1], 2D 24: MAD TEMP[19], TEMP[7], IMM[1].xxxx, TEMP[18].xxxx 25: ADD TEMP[20], TEMP[19], IMM[1].yyyy 26: FLR TEMP[21], TEMP[20] 27: MUL TEMP[7], TEMP[21], IMM[1].zzzz 28: MOV OUT[0], TEMP[7] 29: END radeonsi: Compiling shader 29 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %21 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %1, i64 0, i64 16, !amdgpu.uniform !0 %22 = load <4 x i32>, <4 x i32> addrspace(2)* %21, align 16, !invariant.load !0 %23 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 48) %24 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 52) %25 = fmul nsz float %23, %14 %26 = fadd nsz float %25, %24 %27 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 0) %28 = fsub nsz float 1.000000e+00, %27 %29 = fmul nsz float %27, 0x3FEFC00000000000 %30 = fmul nsz float %28, 7.812500e-03 %31 = fadd nsz float %29, %30 %32 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %33 = load <8 x i32>, <8 x i32> addrspace(2)* %32, align 32, !invariant.load !0 %34 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %35 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %34, i64 0, i64 35, !amdgpu.uniform !0 %36 = load <4 x i32>, <4 x i32> addrspace(2)* %35, align 16, !invariant.load !0 %37 = bitcast float %31 to i32 %38 = insertelement <2 x i32> , i32 %37, i32 1 %39 = bitcast <2 x i32> %38 to <2 x float> %40 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %39, <8 x i32> %33, <4 x i32> %36, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = bitcast <2 x i32> %6 to <2 x float> %46 = extractelement <2 x float> %45, i32 0 %47 = extractelement <2 x float> %45, i32 1 %48 = call nsz float @llvm.amdgcn.interp.p1(float %46, i32 0, i32 0, i32 %4) #1 %49 = call nsz float @llvm.amdgcn.interp.p2(float %48, float %47, i32 0, i32 0, i32 %4) #1 %50 = call nsz float @llvm.amdgcn.interp.p1(float %46, i32 1, i32 0, i32 %4) #1 %51 = call nsz float @llvm.amdgcn.interp.p2(float %50, float %47, i32 1, i32 0, i32 %4) #1 %52 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 20, !amdgpu.uniform !0 %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !invariant.load !0 %54 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %34, i64 0, i64 43, !amdgpu.uniform !0 %55 = load <4 x i32>, <4 x i32> addrspace(2)* %54, align 16, !invariant.load !0 %56 = bitcast float %49 to i32 %57 = bitcast float %51 to i32 %58 = insertelement <2 x i32> undef, i32 %56, i32 0 %59 = insertelement <2 x i32> %58, i32 %57, i32 1 %60 = bitcast <2 x i32> %59 to <2 x float> %61 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %60, <8 x i32> %53, <4 x i32> %55, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %62 = extractelement <4 x float> %61, i32 0 %63 = extractelement <4 x float> %61, i32 1 %64 = extractelement <4 x float> %61, i32 2 %65 = extractelement <4 x float> %61, i32 3 %66 = fmul nsz float %41, %62 %67 = fmul nsz float %41, %63 %68 = fmul nsz float %41, %64 %69 = fmul nsz float %41, %65 %70 = call nsz float @llvm.amdgcn.interp.p1(float %46, i32 2, i32 0, i32 %4) #1 %71 = call nsz float @llvm.amdgcn.interp.p2(float %70, float %47, i32 2, i32 0, i32 %4) #1 %72 = call nsz float @llvm.amdgcn.interp.p1(float %46, i32 3, i32 0, i32 %4) #1 %73 = call nsz float @llvm.amdgcn.interp.p2(float %72, float %47, i32 3, i32 0, i32 %4) #1 %74 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 22, !amdgpu.uniform !0 %75 = load <8 x i32>, <8 x i32> addrspace(2)* %74, align 32, !invariant.load !0 %76 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %34, i64 0, i64 47, !amdgpu.uniform !0 %77 = load <4 x i32>, <4 x i32> addrspace(2)* %76, align 16, !invariant.load !0 %78 = bitcast float %71 to i32 %79 = bitcast float %73 to i32 %80 = insertelement <2 x i32> undef, i32 %78, i32 0 %81 = insertelement <2 x i32> %80, i32 %79, i32 1 %82 = bitcast <2 x i32> %81 to <2 x float> %83 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %82, <8 x i32> %75, <4 x i32> %77, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = extractelement <4 x float> %83, i32 3 %88 = fmul nsz float %42, %84 %89 = fadd nsz float %88, %66 %90 = fmul nsz float %42, %85 %91 = fadd nsz float %90, %67 %92 = fmul nsz float %42, %86 %93 = fadd nsz float %92, %68 %94 = fmul nsz float %42, %87 %95 = fadd nsz float %94, %69 %96 = call nsz float @llvm.amdgcn.interp.p1(float %46, i32 0, i32 1, i32 %4) #1 %97 = call nsz float @llvm.amdgcn.interp.p2(float %96, float %47, i32 0, i32 1, i32 %4) #1 %98 = call nsz float @llvm.amdgcn.interp.p1(float %46, i32 1, i32 1, i32 %4) #1 %99 = call nsz float @llvm.amdgcn.interp.p2(float %98, float %47, i32 1, i32 1, i32 %4) #1 %100 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 24, !amdgpu.uniform !0 %101 = load <8 x i32>, <8 x i32> addrspace(2)* %100, align 32, !invariant.load !0 %102 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %34, i64 0, i64 51, !amdgpu.uniform !0 %103 = load <4 x i32>, <4 x i32> addrspace(2)* %102, align 16, !invariant.load !0 %104 = bitcast float %97 to i32 %105 = bitcast float %99 to i32 %106 = insertelement <2 x i32> undef, i32 %104, i32 0 %107 = insertelement <2 x i32> %106, i32 %105, i32 1 %108 = bitcast <2 x i32> %107 to <2 x float> %109 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %108, <8 x i32> %101, <4 x i32> %103, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %110 = extractelement <4 x float> %109, i32 0 %111 = extractelement <4 x float> %109, i32 1 %112 = extractelement <4 x float> %109, i32 2 %113 = extractelement <4 x float> %109, i32 3 %114 = fmul nsz float %43, %110 %115 = fadd nsz float %114, %89 %116 = fmul nsz float %43, %111 %117 = fadd nsz float %116, %91 %118 = fmul nsz float %43, %112 %119 = fadd nsz float %118, %93 %120 = fmul nsz float %43, %113 %121 = fadd nsz float %120, %95 %122 = call nsz float @llvm.amdgcn.interp.p1(float %46, i32 2, i32 1, i32 %4) #1 %123 = call nsz float @llvm.amdgcn.interp.p2(float %122, float %47, i32 2, i32 1, i32 %4) #1 %124 = call nsz float @llvm.amdgcn.interp.p1(float %46, i32 3, i32 1, i32 %4) #1 %125 = call nsz float @llvm.amdgcn.interp.p2(float %124, float %47, i32 3, i32 1, i32 %4) #1 %126 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 26, !amdgpu.uniform !0 %127 = load <8 x i32>, <8 x i32> addrspace(2)* %126, align 32, !invariant.load !0 %128 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %34, i64 0, i64 55, !amdgpu.uniform !0 %129 = load <4 x i32>, <4 x i32> addrspace(2)* %128, align 16, !invariant.load !0 %130 = bitcast float %123 to i32 %131 = bitcast float %125 to i32 %132 = insertelement <2 x i32> undef, i32 %130, i32 0 %133 = insertelement <2 x i32> %132, i32 %131, i32 1 %134 = bitcast <2 x i32> %133 to <2 x float> %135 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %134, <8 x i32> %127, <4 x i32> %129, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #1 %136 = extractelement <4 x float> %135, i32 0 %137 = extractelement <4 x float> %135, i32 1 %138 = extractelement <4 x float> %135, i32 2 %139 = extractelement <4 x float> %135, i32 3 %140 = fmul nsz float %44, %136 %141 = fadd nsz float %140, %115 %142 = fmul nsz float %44, %137 %143 = fadd nsz float %142, %117 %144 = fmul nsz float %44, %138 %145 = fadd nsz float %144, %119 %146 = fmul nsz float %44, %139 %147 = fadd nsz float %146, %121 %148 = fmul nsz float %13, 1.562500e-02 %149 = fmul nsz float %26, 1.562500e-02 %150 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 16) %151 = fmul nsz float %150, %148 %152 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 20) %153 = fmul nsz float %152, %148 %154 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 32) %155 = fmul nsz float %154, %149 %156 = fadd nsz float %155, %151 %157 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %22, i32 36) %158 = fmul nsz float %157, %149 %159 = fadd nsz float %158, %153 %160 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 18, !amdgpu.uniform !0 %161 = load <8 x i32>, <8 x i32> addrspace(2)* %160, align 32, !invariant.load !0 %162 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %34, i64 0, i64 39, !amdgpu.uniform !0 %163 = load <4 x i32>, <4 x i32> addrspace(2)* %162, align 16, !invariant.load !0 %164 = bitcast float %156 to i32 %165 = bitcast float %159 to i32 %166 = insertelement <2 x i32> undef, i32 %164, i32 0 %167 = insertelement <2 x i32> %166, i32 %165, i32 1 %168 = bitcast <2 x i32> %167 to <2 x float> %169 = call float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float> %168, <8 x i32> %161, <4 x i32> %163, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %170 = fmul nsz float %141, 2.550000e+02 %171 = fadd nsz float %170, %169 %172 = fmul nsz float %143, 2.550000e+02 %173 = fadd nsz float %172, %169 %174 = fmul nsz float %145, 2.550000e+02 %175 = fadd nsz float %174, %169 %176 = fmul nsz float %147, 2.550000e+02 %177 = fadd nsz float %176, %169 %178 = fadd nsz float %171, 0x3F20000000000000 %179 = fadd nsz float %173, 0x3F20000000000000 %180 = fadd nsz float %175, 0x3F20000000000000 %181 = fadd nsz float %177, 0x3F20000000000000 %182 = call nsz float @llvm.floor.f32(float %178) #1 %183 = call nsz float @llvm.floor.f32(float %179) #1 %184 = call nsz float @llvm.floor.f32(float %180) #1 %185 = call nsz float @llvm.floor.f32(float %181) #1 %186 = fmul nsz float %182, 0x3F70101020000000 %187 = fmul nsz float %183, 0x3F70101020000000 %188 = fmul nsz float %184, 0x3F70101020000000 %189 = fmul nsz float %185, 0x3F70101020000000 %190 = bitcast float %3 to i32 %191 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %190, 6 %192 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %191, float %186, 7 %193 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %192, float %187, 8 %194 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %193, float %188, 9 %195 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %194, float %189, 10 %196 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %195, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %196 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #3 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #3 ; Function Attrs: nounwind readnone speculatable declare float @llvm.floor.f32(float) #3 ; Function Attrs: nounwind readonly declare float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readonly } attributes #3 = { nounwind readnone speculatable } !0 = !{} radeonsi: Compiling shader 30 Vertex Shader Prolog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> @vs_prolog(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %16 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> undef, i32 %0, 0 %17 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %16, i32 %1, 1 %18 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %17, i32 %2, 2 %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %18, i32 %3, 3 %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %19, i32 %4, 4 %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %20, i32 %5, 5 %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %21, i32 %6, 6 %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %22, i32 %7, 7 %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %23, i32 %8, 8 %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %24, i32 %9, 9 %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %25, i32 %10, 10 %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %26, i32 %11, 11 %28 = bitcast i32 %12 to float %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %27, float %28, 12 %30 = bitcast i32 %13 to float %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %29, float %30, 13 %32 = bitcast i32 %14 to float %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %31, float %32, 14 %34 = bitcast i32 %15 to float %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %33, float %34, 15 %36 = add i32 %12, %8 %37 = bitcast i32 %36 to float %38 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %35, float %37, 16 %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %38, float %37, 17 %40 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %39, float %37, 18 %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %40, float %37, 19 %42 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %41, float %37, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float }> %42 } attributes #0 = { "no-signed-zeros-fp-math"="true" } SHADER KEY part.vs.prolog.instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} mono.vs.fix_fetch = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 mono.u.vs_export_prim_id = 0 opt.kill_outputs[0] = 0x0 opt.kill_outputs[1] = 0x0 opt.clip_disable = 0 Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s8, v0 ; 32080008 v_mov_b32_e32 v5, v4 ; 7E0A0304 v_mov_b32_e32 v6, v4 ; 7E0C0304 v_mov_b32_e32 v7, v4 ; 7E0E0304 v_mov_b32_e32 v8, v4 ; 7E100304 Shader main disassembly: s_load_dwordx4 s[12:15], s[6:7], 0x20 ; C00A0303 00000020 s_load_dwordx4 s[8:11], s[6:7], 0x10 ; C00A0203 00000010 s_load_dwordx4 s[0:3], s[6:7], 0x0 ; C00A0003 00000000 s_load_dwordx4 s[16:19], s[6:7], 0x30 ; C00A0403 00000030 s_load_dwordx4 s[4:7], s[6:7], 0x40 ; C00A0103 00000040 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[9:12], v6, s[12:15], 0 idxen ; E00C2000 80030906 buffer_load_format_xyzw v[0:3], v5, s[8:11], 0 idxen ; E00C2000 80020005 s_waitcnt vmcnt(1) ; BF8C0F71 buffer_load_format_xyzw v[11:14], v7, s[16:19], 0 idxen ; E00C2000 80040B07 buffer_load_format_xyzw v[5:8], v8, s[4:7], 0 idxen ; E00C2000 80010508 s_waitcnt vmcnt(1) ; BF8C0F71 buffer_load_format_xyzw v[13:16], v4, s[0:3], 0 idxen ; E00C2000 80000D04 exp param0 v0, v1, v9, v10 ; C400020F 0A090100 s_waitcnt expcnt(0) ; BF8C0F0F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 s_waitcnt vmcnt(1) ; BF8C0F71 exp param1 v11, v12, v5, v6 ; C400021F 06050C0B s_waitcnt vmcnt(0) ; BF8C0F70 exp pos0 v13, v14, v0, v0 done ; C40008CF 00000E0D s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 20 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 156 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x4 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b64 s[0:1], exec ; BE80017E s_wqm_b64 exec, exec ; BEFE077E s_load_dwordx4 s[8:11], s[2:3], 0x100 ; C00A0201 00000100 s_load_dwordx4 s[24:27], s[4:5], 0x230 ; C00A0602 00000230 s_load_dwordx4 s[36:39], s[4:5], 0x2b0 ; C00A0902 000002B0 s_mov_b32 m0, s7 ; BEFC0007 s_load_dwordx8 s[16:23], s[4:5], 0x240 ; C00E0402 00000240 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s3, s[8:11], 0x34 ; C02200C4 00000034 s_buffer_load_dword s2, s[8:11], 0x30 ; C0220084 00000030 s_buffer_load_dword s40, s[8:11], 0x14 ; C0220A04 00000014 s_buffer_load_dword s41, s[8:11], 0x20 ; C0220A44 00000020 s_buffer_load_dword s42, s[8:11], 0x24 ; C0220A84 00000024 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v14, s3 ; 7E1C0203 v_mac_f32_e32 v14, s2, v13 ; 2C1C1A02 s_buffer_load_dword s2, s[8:11], 0x0 ; C0220084 00000000 s_buffer_load_dword s3, s[8:11], 0x10 ; C02200C4 00000010 s_load_dwordx8 s[8:15], s[4:5], 0x200 ; C00E0202 00000200 s_load_dwordx4 s[32:35], s[4:5], 0x270 ; C00A0802 00000270 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v0, 1.0, s2 ; D1020000 000004F2 v_mul_f32_e32 v1, 0x3c000000, v0 ; 0A0200FF 3C000000 v_mov_b32_e32 v0, 0x3f7e0000 ; 7E0002FF 3F7E0000 v_mac_f32_e32 v1, s2, v0 ; 2C020002 v_mov_b32_e32 v0, 0.5 ; 7E0002F0 image_sample v[4:7], v[0:1], s[8:15], s[24:27] dmask:0xf ; F0800F00 00C20400 s_load_dwordx8 s[8:15], s[4:5], 0x280 ; C00E0202 00000280 v_interp_p1_f32 v0, v2, attr0.x ; D4000002 v_interp_p1_f32 v1, v2, attr0.y ; D4040102 v_interp_p2_f32 v0, v3, attr0.x ; D4010003 v_interp_p2_f32 v1, v3, attr0.y ; D4050103 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[8:11], v[0:1], s[8:15], s[36:39] dmask:0xf ; F0800F00 01220800 s_load_dwordx8 s[24:31], s[4:5], 0x2c0 ; C00E0602 000002C0 s_load_dwordx4 s[36:39], s[4:5], 0x2f0 ; C00A0902 000002F0 v_interp_p1_f32 v0, v2, attr0.z ; D4000202 v_interp_p1_f32 v1, v2, attr0.w ; D4040302 v_interp_p2_f32 v0, v3, attr0.z ; D4010203 v_interp_p2_f32 v1, v3, attr0.w ; D4050303 s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v13, v8, v4 ; 0A1A0908 v_mul_f32_e32 v16, v9, v4 ; 0A200909 v_mul_f32_e32 v17, v10, v4 ; 0A22090A v_mul_f32_e32 v4, v11, v4 ; 0A08090B s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[8:11], v[0:1], s[24:31], s[36:39] dmask:0xf ; F0800F00 01260800 s_load_dwordx8 s[8:15], s[4:5], 0x300 ; C00E0202 00000300 s_load_dwordx4 s[36:39], s[4:5], 0x330 ; C00A0902 00000330 v_interp_p1_f32 v0, v2, attr1.x ; D4000402 v_interp_p1_f32 v1, v2, attr1.y ; D4040502 v_interp_p2_f32 v0, v3, attr1.x ; D4010403 v_interp_p2_f32 v1, v3, attr1.y ; D4050503 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v13, v8, v5 ; 2C1A0B08 v_mac_f32_e32 v16, v9, v5 ; 2C200B09 v_mac_f32_e32 v17, v10, v5 ; 2C220B0A v_mac_f32_e32 v4, v11, v5 ; 2C080B0B s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[8:11], v[0:1], s[8:15], s[36:39] dmask:0xf ; F0800F00 01220800 s_load_dwordx8 s[24:31], s[4:5], 0x340 ; C00E0602 00000340 s_load_dwordx4 s[8:11], s[4:5], 0x370 ; C00A0202 00000370 v_interp_p1_f32 v0, v2, attr1.z ; D4000602 v_interp_p1_f32 v1, v2, attr1.w ; D4040702 v_interp_p2_f32 v0, v3, attr1.z ; D4010603 v_interp_p2_f32 v1, v3, attr1.w ; D4050703 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v13, v8, v6 ; 2C1A0D08 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], v[0:1], s[24:31], s[8:11] dmask:0xf ; F0800F00 00460000 v_mac_f32_e32 v16, v9, v6 ; 2C200D09 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v13, v0, v7 ; 2C1A0F00 v_mov_b32_e32 v0, 0x3c800000 ; 7E0002FF 3C800000 v_mac_f32_e32 v17, v10, v6 ; 2C220D0A v_mac_f32_e32 v16, v1, v7 ; 2C200F01 v_mul_f32_e32 v1, v0, v12 ; 0A021900 v_mac_f32_e32 v4, v11, v6 ; 2C080D0B v_mac_f32_e32 v17, v2, v7 ; 2C220F02 v_mul_f32_e32 v2, v0, v14 ; 0A041D00 v_mul_f32_e32 v0, s3, v1 ; 0A000203 v_mul_f32_e32 v1, s40, v1 ; 0A020228 v_mac_f32_e32 v4, v3, v7 ; 2C080F03 v_mac_f32_e32 v0, s41, v2 ; 2C000429 v_mac_f32_e32 v1, s42, v2 ; 2C02042A s_and_b64 exec, exec, s[0:1] ; 86FE007E image_sample v0, v[0:1], s[16:23], s[32:35] dmask:0x1 ; F0800100 01040000 v_mov_b32_e32 v1, 0x437f0000 ; 7E0202FF 437F0000 s_waitcnt vmcnt(0) ; BF8C0F70 v_mad_f32 v2, v1, v13, v0 ; D1C10002 04021B01 v_mad_f32 v3, v1, v16, v0 ; D1C10003 04022101 v_mad_f32 v5, v1, v17, v0 ; D1C10005 04022301 v_mac_f32_e32 v0, v1, v4 ; 2C000901 v_mov_b32_e32 v1, 0x39000000 ; 7E0202FF 39000000 v_add_f32_e32 v4, v1, v5 ; 02080B01 v_add_f32_e32 v2, v1, v2 ; 02040501 v_add_f32_e32 v3, v1, v3 ; 02060701 v_add_f32_e32 v0, v1, v0 ; 02000101 v_floor_f32_e32 v1, v2 ; 7E023F02 v_floor_f32_e32 v2, v3 ; 7E043F03 v_floor_f32_e32 v3, v4 ; 7E063F04 v_floor_f32_e32 v4, v0 ; 7E083F00 v_mov_b32_e32 v5, 0x3b808081 ; 7E0A02FF 3B808081 v_mul_f32_e32 v0, v5, v1 ; 0A000305 v_mul_f32_e32 v1, v5, v2 ; 0A020505 v_mul_f32_e32 v2, v5, v3 ; 0A040705 v_mul_f32_e32 v3, v5, v4 ; 0A060905 v_mov_b32_e32 v13, v15 ; 7E1A030F Shader epilog disassembly: v_cvt_pkrtz_f16_f32 v0, v0, v1 ; D2960000 00020300 v_cvt_pkrtz_f16_f32 v1, v2, v3 ; D2960001 00020702 exp mrt0 v0, v0, v1, v1 done compr vm ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd377 SPI_PS_INPUT_ENA = 0x0302 *** SHADER STATS *** SGPRS: 48 VGPRS: 20 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 604 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2].xy, GENERIC[1] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MOV TEMP[0].xy, IN[0].xyxx 2: MOV OUT[0], TEMP[0] 3: MOV OUT[2].xy, IN[1].xyxx 4: MOV OUT[1], IN[2] 5: END radeonsi: Compiling shader 31 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs void @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) #0 { main_body: %15 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 0, !amdgpu.uniform !0 %16 = load <4 x i32>, <4 x i32> addrspace(2)* %15, align 16, !invariant.load !0 %17 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %16, i32 %12, i32 0, i1 false, i1 false) #3 %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 1, !amdgpu.uniform !0 %21 = load <4 x i32>, <4 x i32> addrspace(2)* %20, align 16, !invariant.load !0 %22 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %21, i32 %13, i32 0, i1 false, i1 false) #3 %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 2, !amdgpu.uniform !0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !invariant.load !0 %27 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %26, i32 %14, i32 0, i1 false, i1 false) #3 %28 = extractelement <4 x float> %27, i32 0 %29 = extractelement <4 x float> %27, i32 1 %30 = extractelement <4 x float> %27, i32 2 %31 = extractelement <4 x float> %27, i32 3 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %28, float %29, float %30, float %31, i1 false, i1 false) #2 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %23, float %24, float undef, float undef, i1 false, i1 false) #2 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %18, float %19, float 1.000000e+00, float 1.000000e+00, i1 true, i1 false) #2 ret void } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #2 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind } attributes #3 = { nounwind readnone } !0 = !{} FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1].xy, GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..3], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MOV TEMP[1].xy, IN[1].xyyy 2: TEX TEMP[2].x, TEMP[1], SAMP[0], 2D 3: MUL TEMP[3].x, IN[0].wwww, TEMP[2].xxxx 4: MOV TEMP[0].w, TEMP[3].xxxx 5: MOV OUT[0], TEMP[0] 6: END radeonsi: Compiling shader 32 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %21 = bitcast <2 x i32> %6 to <2 x float> %22 = extractelement <2 x float> %21, i32 0 %23 = extractelement <2 x float> %21, i32 1 %24 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 0, i32 0, i32 %4) #3 %25 = call nsz float @llvm.amdgcn.interp.p2(float %24, float %23, i32 0, i32 0, i32 %4) #3 %26 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 1, i32 0, i32 %4) #3 %27 = call nsz float @llvm.amdgcn.interp.p2(float %26, float %23, i32 1, i32 0, i32 %4) #3 %28 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 2, i32 0, i32 %4) #3 %29 = call nsz float @llvm.amdgcn.interp.p2(float %28, float %23, i32 2, i32 0, i32 %4) #3 %30 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 0, i32 1, i32 %4) #3 %31 = call nsz float @llvm.amdgcn.interp.p2(float %30, float %23, i32 0, i32 1, i32 %4) #3 %32 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 1, i32 1, i32 %4) #3 %33 = call nsz float @llvm.amdgcn.interp.p2(float %32, float %23, i32 1, i32 1, i32 %4) #3 %34 = getelementptr [80 x <8 x i32>], [80 x <8 x i32>] addrspace(2)* %2, i64 0, i64 16, !amdgpu.uniform !0 %35 = load <8 x i32>, <8 x i32> addrspace(2)* %34, align 32, !invariant.load !0 %36 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %37 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %36, i64 0, i64 35, !amdgpu.uniform !0 %38 = load <4 x i32>, <4 x i32> addrspace(2)* %37, align 16, !invariant.load !0 %39 = bitcast float %31 to i32 %40 = bitcast float %33 to i32 %41 = insertelement <2 x i32> undef, i32 %39, i32 0 %42 = insertelement <2 x i32> %41, i32 %40, i32 1 %43 = bitcast <2 x i32> %42 to <2 x float> %44 = call float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float> %43, <8 x i32> %35, <4 x i32> %38, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false) %45 = call nsz float @llvm.amdgcn.interp.p1(float %22, i32 3, i32 0, i32 %4) #3 %46 = call nsz float @llvm.amdgcn.interp.p2(float %45, float %23, i32 3, i32 0, i32 %4) #3 %47 = fmul nsz float %46, %44 %48 = bitcast float %3 to i32 %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %48, 6 %50 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49, float %25, 7 %51 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %50, float %27, 8 %52 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %51, float %29, 9 %53 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %52, float %47, 10 %54 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %53, float %19, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %54 } ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 ; Function Attrs: nounwind readonly declare float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind readonly } attributes #3 = { nounwind readnone } !0 = !{} SHADER KEY part.vs.prolog.instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} mono.vs.fix_fetch = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 mono.u.vs_export_prim_id = 0 opt.kill_outputs[0] = 0x0 opt.kill_outputs[1] = 0x0 opt.clip_disable = 0 Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s8, v0 ; 32080008 v_mov_b32_e32 v5, v4 ; 7E0A0304 v_mov_b32_e32 v6, v4 ; 7E0C0304 Shader main disassembly: s_load_dwordx4 s[0:3], s[6:7], 0x0 ; C00A0003 00000000 s_load_dwordx4 s[8:11], s[6:7], 0x10 ; C00A0203 00000010 s_load_dwordx4 s[4:7], s[6:7], 0x20 ; C00A0103 00000020 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[9:12], v4, s[0:3], 0 idxen ; E00C2000 80000904 buffer_load_format_xyzw v[0:3], v5, s[8:11], 0 idxen ; E00C2000 80020005 buffer_load_format_xyzw v[5:8], v6, s[4:7], 0 idxen ; E00C2000 80010506 s_waitcnt vmcnt(0) ; BF8C0F70 exp param0 v5, v6, v7, v8 ; C400020F 08070605 exp param1 v0, v1, v0, v0 ; C400021F 00000100 s_waitcnt expcnt(0) ; BF8C0F0F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp pos0 v9, v10, v0, v0 done ; C40008CF 00000A09 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 104 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** radeonsi: Compiling shader 33 Fragment Shader Epilog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_ps void @ps_epilog(i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 { main_body: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %4, float %5, float %6, float %7, i1 true, i1 true) #1 ret void } ; Function Attrs: nounwind declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1 attributes #0 = { "InitialPSInputAddr"="16777215" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind } SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x9 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b64 s[16:17], exec ; BE90017E s_wqm_b64 exec, exec ; BEFE077E s_load_dwordx8 s[8:15], s[4:5], 0x200 ; C00E0202 00000200 s_load_dwordx4 s[0:3], s[4:5], 0x230 ; C00A0002 00000230 s_mov_b32 m0, s7 ; BEFC0007 v_interp_p1_f32 v0, v2, attr0.x ; D4000002 v_interp_p1_f32 v1, v2, attr0.y ; D4040102 v_interp_p1_f32 v4, v2, attr0.z ; D4100202 v_interp_p1_f32 v5, v2, attr1.x ; D4140402 v_interp_p1_f32 v6, v2, attr1.y ; D4180502 v_interp_p2_f32 v0, v3, attr0.x ; D4010003 v_interp_p2_f32 v1, v3, attr0.y ; D4050103 v_interp_p2_f32 v4, v3, attr0.z ; D4110203 v_interp_p2_f32 v5, v3, attr1.x ; D4150403 v_interp_p2_f32 v6, v3, attr1.y ; D4190503 s_and_b64 exec, exec, s[16:17] ; 86FE107E v_interp_p1_f32 v2, v2, attr0.w ; D4080302 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v5, v[5:6], s[8:15], s[0:3] dmask:0x1 ; F0800100 00020505 v_interp_p2_f32 v2, v3, attr0.w ; D4090303 s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v3, v5, v2 ; 0A060505 v_mov_b32_e32 v2, v4 ; 7E040304 Shader epilog disassembly: exp mrt0 v0, v1, v2, v3 done vm ; C400180F 03020100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0002 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 116 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** SHADER KEY part.vs.prolog.instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} mono.vs.fix_fetch = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 mono.u.vs_export_prim_id = 0 opt.kill_outputs[0] = 0x0 opt.kill_outputs[1] = 0x0 opt.clip_disable = 0 Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s8, v0 ; 32080008 v_mov_b32_e32 v5, v4 ; 7E0A0304 v_mov_b32_e32 v6, v4 ; 7E0C0304 Shader main disassembly: s_load_dwordx4 s[0:3], s[6:7], 0x0 ; C00A0003 00000000 s_load_dwordx4 s[8:11], s[6:7], 0x10 ; C00A0203 00000010 s_load_dwordx4 s[4:7], s[6:7], 0x20 ; C00A0103 00000020 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[9:12], v4, s[0:3], 0 idxen ; E00C2000 80000904 buffer_load_format_xyzw v[0:3], v5, s[8:11], 0 idxen ; E00C2000 80020005 buffer_load_format_xyzw v[5:8], v6, s[4:7], 0 idxen ; E00C2000 80010506 s_waitcnt vmcnt(0) ; BF8C0F70 exp param0 v5, v6, v7, v8 ; C400020F 08070605 exp param1 v0, v1, v0, v0 ; C400021F 00000100 s_waitcnt expcnt(0) ; BF8C0F0F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp pos0 v9, v10, v0, v0 done ; C40008CF 00000A09 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 104 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x4 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.color_is_int10 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b64 s[16:17], exec ; BE90017E s_wqm_b64 exec, exec ; BEFE077E s_load_dwordx8 s[8:15], s[4:5], 0x200 ; C00E0202 00000200 s_load_dwordx4 s[0:3], s[4:5], 0x230 ; C00A0002 00000230 s_mov_b32 m0, s7 ; BEFC0007 v_interp_p1_f32 v0, v2, attr0.x ; D4000002 v_interp_p1_f32 v1, v2, attr0.y ; D4040102 v_interp_p1_f32 v4, v2, attr0.z ; D4100202 v_interp_p1_f32 v5, v2, attr1.x ; D4140402 v_interp_p1_f32 v6, v2, attr1.y ; D4180502 v_interp_p2_f32 v0, v3, attr0.x ; D4010003 v_interp_p2_f32 v1, v3, attr0.y ; D4050103 v_interp_p2_f32 v4, v3, attr0.z ; D4110203 v_interp_p2_f32 v5, v3, attr1.x ; D4150403 v_interp_p2_f32 v6, v3, attr1.y ; D4190503 s_and_b64 exec, exec, s[16:17] ; 86FE107E v_interp_p1_f32 v2, v2, attr0.w ; D4080302 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v5, v[5:6], s[8:15], s[0:3] dmask:0x1 ; F0800100 00020505 v_interp_p2_f32 v2, v3, attr0.w ; D4090303 s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v3, v5, v2 ; 0A060505 v_mov_b32_e32 v2, v4 ; 7E040304 Shader epilog disassembly: v_cvt_pkrtz_f16_f32 v0, v0, v1 ; D2960000 00020300 v_cvt_pkrtz_f16_f32 v1, v2, v3 ; D2960001 00020702 exp mrt0 v0, v0, v1, v1 done compr vm ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0002 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 132 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ********************