TGSI: VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], COLOR[1] DCL CONST[0][19..23] DCL CONST[0][24] DCL CONST[0][0..7] DCL CONST[0][16..18] DCL TEMP[0..10] DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 32.0000, 3.0000} IMM[1] FLT32 { 2.0000, 8.0000, 0.0000, 0.0000} 0: MUL TEMP[0], IN[0].xxxx, CONST[0][0] 1: MAD TEMP[0], IN[0].yyyy, CONST[0][1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[0][2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[0][3], TEMP[0] 4: MUL TEMP[0].xyz, IN[0].xxxx, CONST[0][4] 5: MAD TEMP[0].xyz, IN[0].yyyy, CONST[0][5], TEMP[0] 6: MAD TEMP[0].xyz, IN[0].zzzz, CONST[0][6], TEMP[0] 7: MAD TEMP[0].xyz, IN[0].wwww, CONST[0][7], TEMP[0] 8: MUL TEMP[1].xyz, IN[1].xxxx, CONST[0][16] 9: MAD TEMP[1].xyz, IN[1].yyyy, CONST[0][17], TEMP[1] 10: MAD TEMP[1].xyz, IN[1].zzzz, CONST[0][18], TEMP[1] 11: MOV TEMP[6].w, IMM[0].zzzz 12: MOV TEMP[7].xyz, IMM[0].xxxx 13: MOV TEMP[8].xyz, IMM[0].xxxx 14: MOV TEMP[9], IMM[0].xxxx 15: BGNLOOP :68 16: ARL ADDR[0].x, TEMP[6].wwww 17: SNE TEMP[2].x, CONST[0][ADDR[0].x].xxxx, IMM[0].wwww 18: MOV TEMP[4].xyz, -CONST[0][ADDR[0].x+5] 19: MOV TEMP[3].w, IMM[0].yyyy 20: IF TEMP[2].xxxx :30 21: ADD TEMP[4].xyz, CONST[0][ADDR[0].x+4], -TEMP[0] 22: DP3 TEMP[2].x, TEMP[4], TEMP[4] 23: RSQ TEMP[2].y, TEMP[2].xxxx 24: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].yyyy 25: MAD TEMP[3].w, TEMP[2].xxxx, CONST[0][ADDR[0].x].wwww, CONST[0][ADDR[0].x].zzzz 26: MAD TEMP[3].w, TEMP[2].xxxx, TEMP[3].wwww, CONST[0][ADDR[0].x].yyyy 27: RCP TEMP[3].w, TEMP[3].wwww 28: SLT TEMP[2].x, TEMP[2].xxxx, CONST[0][ADDR[0].x+4].wwww 29: MUL TEMP[3].w, TEMP[3].wwww, TEMP[2].xxxx 30: ENDIF 31: DP3 TEMP[10].x, TEMP[4], TEMP[4] 32: RSQ TEMP[10].x, TEMP[10].xxxx 33: MUL TEMP[4].xyz, TEMP[4], TEMP[10].xxxx 34: SEQ TEMP[2].x, CONST[0][ADDR[0].x].xxxx, IMM[1].xxxx 35: IF TEMP[2].xxxx :44 36: DP3 TEMP[2].y, -TEMP[4], CONST[0][ADDR[0].x+5] 37: ADD TEMP[2].x, TEMP[2].yyyy, -CONST[0][ADDR[0].x+6].yyyy 38: MUL TEMP[2].x, TEMP[2].xxxx, CONST[0][ADDR[0].x+6].zzzz 39: POW TEMP[2].x, TEMP[2].xxxx, CONST[0][ADDR[0].x+5].wwww 40: SGE TEMP[2].z, TEMP[2].yyyy, CONST[0][ADDR[0].x+6].xxxx 41: SGE TEMP[2].y, TEMP[2].yyyy, CONST[0][ADDR[0].x+6].yyyy 42: MAD_SAT TEMP[2].x, TEMP[2].xxxx, TEMP[2].yyyy, TEMP[2].zzzz 43: MUL TEMP[3].w, TEMP[3].wwww, TEMP[2].xxxx 44: ENDIF 45: DP3 TEMP[10].x, TEMP[0], TEMP[0] 46: RSQ TEMP[10].x, TEMP[10].xxxx 47: MUL TEMP[5].xyz, TEMP[0], TEMP[10].xxxx 48: ADD TEMP[5].xyz, TEMP[4], -TEMP[5] 49: DP3 TEMP[10].x, TEMP[5], TEMP[5] 50: RSQ TEMP[10].x, TEMP[10].xxxx 51: MUL TEMP[5].xyz, TEMP[5], TEMP[10].xxxx 52: DP3_SAT TEMP[2].x, TEMP[1], TEMP[4] 53: DP3_SAT TEMP[2].y, TEMP[1], TEMP[5] 54: MUL TEMP[2].z, TEMP[2].xxxx, TEMP[2].yyyy 55: IF TEMP[2].zzzz :60 56: DP3_SAT TEMP[2].y, TEMP[1], TEMP[5] 57: POW TEMP[2].y, TEMP[2].yyyy, CONST[0][23].xxxx 58: MUL TEMP[2].y, TEMP[3].wwww, TEMP[2].yyyy 59: MAD TEMP[9], CONST[0][ADDR[0].x+2], TEMP[2].yyyy, TEMP[9] 60: ENDIF 61: MUL TEMP[2].x, TEMP[3].wwww, TEMP[2].xxxx 62: MAD TEMP[7].xyz, CONST[0][ADDR[0].x+1], TEMP[2].xxxx, TEMP[7] 63: MAD TEMP[8].xyz, CONST[0][ADDR[0].x+3], TEMP[3].wwww, TEMP[8] 64: IF CONST[0][ADDR[0].x+7].wwww :66 65: BRK 66: ENDIF 67: ADD TEMP[6].w, TEMP[6].wwww, IMM[1].yyyy 68: ENDLOOP :0 69: MAD TEMP[2].xyz, TEMP[8], CONST[0][21], CONST[0][19] 70: MAD_SAT OUT[1].xyz, TEMP[7], IN[2], TEMP[2] 71: MOV_SAT OUT[1].w, IN[2] 72: MUL_SAT OUT[2], TEMP[9], CONST[0][22] 73: END non-buggy shader: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_vs void @main([12 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32) #0 { main_body: %15 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 0, !amdgpu.uniform !0 %16 = load <4 x i32>, <4 x i32> addrspace(2)* %15, align 16, !invariant.load !0 %17 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %16, i32 %12, i32 0, i1 false, i1 false) #2 %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 1, !amdgpu.uniform !0 %23 = load <4 x i32>, <4 x i32> addrspace(2)* %22, align 16, !invariant.load !0 %24 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %23, i32 %13, i32 0, i1 false, i1 false) #2 %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %3, i64 0, i64 2, !amdgpu.uniform !0 %29 = load <4 x i32>, <4 x i32> addrspace(2)* %28, align 16, !invariant.load !0 %30 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %29, i32 %14, i32 0, i1 false, i1 false) #2 %31 = extractelement <4 x float> %30, i32 0 %32 = extractelement <4 x float> %30, i32 1 %33 = extractelement <4 x float> %30, i32 2 %34 = extractelement <4 x float> %30, i32 3 %35 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %1, i64 0, i64 16, !amdgpu.uniform !0 %36 = load <4 x i32>, <4 x i32> addrspace(2)* %35, align 16, !invariant.load !0 %37 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 0) %38 = fmul nsz float %18, %37 %39 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 4) %40 = fmul nsz float %18, %39 %41 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 8) %42 = fmul nsz float %18, %41 %43 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 12) %44 = fmul nsz float %18, %43 %45 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 16) %46 = fmul nsz float %19, %45 %47 = fadd nsz float %46, %38 %48 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 20) %49 = fmul nsz float %19, %48 %50 = fadd nsz float %49, %40 %51 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 24) %52 = fmul nsz float %19, %51 %53 = fadd nsz float %52, %42 %54 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 28) %55 = fmul nsz float %19, %54 %56 = fadd nsz float %55, %44 %57 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 32) %58 = fmul nsz float %20, %57 %59 = fadd nsz float %58, %47 %60 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 36) %61 = fmul nsz float %20, %60 %62 = fadd nsz float %61, %50 %63 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 40) %64 = fmul nsz float %20, %63 %65 = fadd nsz float %64, %53 %66 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 44) %67 = fmul nsz float %20, %66 %68 = fadd nsz float %67, %56 %69 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 48) %70 = fmul nsz float %21, %69 %71 = fadd nsz float %70, %59 %72 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 52) %73 = fmul nsz float %21, %72 %74 = fadd nsz float %73, %62 %75 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 56) %76 = fmul nsz float %21, %75 %77 = fadd nsz float %76, %65 %78 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 60) %79 = fmul nsz float %21, %78 %80 = fadd nsz float %79, %68 %81 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 64) %82 = fmul nsz float %18, %81 %83 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 68) %84 = fmul nsz float %18, %83 %85 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 72) %86 = fmul nsz float %18, %85 %87 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 80) %88 = fmul nsz float %19, %87 %89 = fadd nsz float %88, %82 %90 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 84) %91 = fmul nsz float %19, %90 %92 = fadd nsz float %91, %84 %93 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 88) %94 = fmul nsz float %19, %93 %95 = fadd nsz float %94, %86 %96 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 96) %97 = fmul nsz float %20, %96 %98 = fadd nsz float %97, %89 %99 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 100) %100 = fmul nsz float %20, %99 %101 = fadd nsz float %100, %92 %102 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 104) %103 = fmul nsz float %20, %102 %104 = fadd nsz float %103, %95 %105 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 112) %106 = fmul nsz float %21, %105 %107 = fadd nsz float %106, %98 %108 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 116) %109 = fmul nsz float %21, %108 %110 = fadd nsz float %109, %101 %111 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 120) %112 = fmul nsz float %21, %111 %113 = fadd nsz float %112, %104 %114 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 256) %115 = fmul nsz float %25, %114 %116 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 260) %117 = fmul nsz float %25, %116 %118 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 264) %119 = fmul nsz float %25, %118 %120 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 272) %121 = fmul nsz float %26, %120 %122 = fadd nsz float %121, %115 %123 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 276) %124 = fmul nsz float %26, %123 %125 = fadd nsz float %124, %117 %126 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 280) %127 = fmul nsz float %26, %126 %128 = fadd nsz float %127, %119 %129 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 288) %130 = fmul nsz float %27, %129 %131 = fadd nsz float %130, %122 %132 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 292) %133 = fmul nsz float %27, %132 %134 = fadd nsz float %133, %125 %135 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 296) %136 = fmul nsz float %27, %135 %137 = fadd nsz float %136, %128 %138 = fmul nsz float %107, %107 %139 = fmul nsz float %110, %110 %140 = fadd nsz float %139, %138 %141 = fmul nsz float %113, %113 %142 = fadd nsz float %140, %141 %143 = call nsz float @llvm.sqrt.f32(float %142) #2 %144 = fdiv nsz float 1.000000e+00, %143, !fpmath !1 %145 = fmul nsz float %107, %144 %146 = fmul nsz float %110, %144 %147 = fmul nsz float %113, %144 br label %loop15 loop15: ; preds = %endif66, %main_body %TEMP6.w.0 = phi float [ 3.200000e+01, %main_body ], [ %308, %endif66 ] %TEMP7.x.0 = phi float [ 0.000000e+00, %main_body ], [ %284, %endif66 ] %TEMP7.y.0 = phi float [ 0.000000e+00, %main_body ], [ %288, %endif66 ] %TEMP7.z.0 = phi float [ 0.000000e+00, %main_body ], [ %292, %endif66 ] %TEMP8.x.0 = phi float [ 0.000000e+00, %main_body ], [ %296, %endif66 ] %TEMP8.y.0 = phi float [ 0.000000e+00, %main_body ], [ %300, %endif66 ] %TEMP8.z.0 = phi float [ 0.000000e+00, %main_body ], [ %304, %endif66 ] %TEMP9.x.0 = phi float [ 0.000000e+00, %main_body ], [ %TEMP9.x.1, %endif66 ] %TEMP9.y.0 = phi float [ 0.000000e+00, %main_body ], [ %TEMP9.y.1, %endif66 ] %TEMP9.z.0 = phi float [ 0.000000e+00, %main_body ], [ %TEMP9.z.1, %endif66 ] %TEMP9.w.0 = phi float [ 0.000000e+00, %main_body ], [ %TEMP9.w.1, %endif66 ] %148 = call nsz float @llvm.floor.f32(float %TEMP6.w.0) #2 %149 = fptosi float %148 to i32 %150 = shl i32 %149, 4 %151 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %150) %152 = fcmp nsz une float %151, 3.000000e+00 %153 = add i32 %150, 80 %154 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %153) %155 = fsub nsz float -0.000000e+00, %154 %156 = add i32 %150, 84 %157 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %156) %158 = fsub nsz float -0.000000e+00, %157 %159 = add i32 %150, 88 %160 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %159) %161 = fsub nsz float -0.000000e+00, %160 br i1 %152, label %if20, label %endif30 if20: ; preds = %loop15 %162 = add i32 %150, 64 %163 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %162) %164 = fsub nsz float %163, %107 %165 = add i32 %150, 68 %166 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %165) %167 = fsub nsz float %166, %110 %168 = add i32 %150, 72 %169 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %168) %170 = fsub nsz float %169, %113 %171 = fmul nsz float %164, %164 %172 = fmul nsz float %167, %167 %173 = fadd nsz float %172, %171 %174 = fmul nsz float %170, %170 %175 = fadd nsz float %173, %174 %176 = call nsz float @llvm.sqrt.f32(float %175) #2 %177 = fdiv nsz float 1.000000e+00, %176, !fpmath !1 %178 = fmul nsz float %175, %177 %179 = or i32 %150, 12 %180 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %179) %181 = or i32 %150, 8 %182 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %181) %183 = fmul nsz float %178, %180 %184 = fadd nsz float %183, %182 %185 = or i32 %150, 4 %186 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %185) %187 = fmul nsz float %178, %184 %188 = fadd nsz float %187, %186 %189 = fdiv nsz float 1.000000e+00, %188, !fpmath !1 %190 = add i32 %150, 76 %191 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %190) %192 = fcmp nsz olt float %178, %191 %193 = select i1 %192, float 1.000000e+00, float 0.000000e+00 %194 = fmul nsz float %189, %193 br label %endif30 endif30: ; preds = %if20, %loop15 %TEMP3.w.0 = phi float [ %194, %if20 ], [ 1.000000e+00, %loop15 ] %TEMP4.x.0 = phi float [ %164, %if20 ], [ %155, %loop15 ] %TEMP4.y.0 = phi float [ %167, %if20 ], [ %158, %loop15 ] %TEMP4.z.0 = phi float [ %170, %if20 ], [ %161, %loop15 ] %195 = fmul nsz float %TEMP4.x.0, %TEMP4.x.0 %196 = fmul nsz float %TEMP4.y.0, %TEMP4.y.0 %197 = fadd nsz float %196, %195 %198 = fmul nsz float %TEMP4.z.0, %TEMP4.z.0 %199 = fadd nsz float %197, %198 %200 = call nsz float @llvm.sqrt.f32(float %199) #2 %201 = fdiv nsz float 1.000000e+00, %200, !fpmath !1 %202 = fmul nsz float %TEMP4.x.0, %201 %203 = fmul nsz float %TEMP4.y.0, %201 %204 = fmul nsz float %TEMP4.z.0, %201 %205 = fcmp nsz oeq float %151, 2.000000e+00 br i1 %205, label %if35, label %endif44 if35: ; preds = %endif30 %206 = fmul nsz float %202, %154 %207 = fsub nsz float -0.000000e+00, %206 %208 = fmul nsz float %203, %157 %209 = fsub nsz float %207, %208 %210 = fmul nsz float %204, %160 %211 = fsub nsz float %209, %210 %212 = add i32 %150, 100 %213 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %212) %214 = fsub nsz float %211, %213 %215 = add i32 %150, 104 %216 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %215) %217 = fmul nsz float %214, %216 %218 = add i32 %150, 92 %219 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %218) %220 = call nsz float @llvm.pow.f32(float %217, float %219) #2 %221 = add i32 %150, 96 %222 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %221) %223 = fcmp nsz oge float %211, %222 %224 = select i1 %223, float 1.000000e+00, float 0.000000e+00 %225 = fcmp nsz oge float %211, %213 %226 = select i1 %225, float 1.000000e+00, float 0.000000e+00 %227 = fmul nsz float %220, %226 %228 = fadd nsz float %227, %224 %229 = call nsz float @llvm.maxnum.f32(float %228, float 0.000000e+00) #2 %230 = call nsz float @llvm.minnum.f32(float %229, float 1.000000e+00) #2 %231 = fmul nsz float %TEMP3.w.0, %230 br label %endif44 endif44: ; preds = %if35, %endif30 %TEMP3.w.1 = phi float [ %231, %if35 ], [ %TEMP3.w.0, %endif30 ] %232 = fsub nsz float %202, %145 %233 = fsub nsz float %203, %146 %234 = fsub nsz float %204, %147 %235 = fmul nsz float %232, %232 %236 = fmul nsz float %233, %233 %237 = fadd nsz float %236, %235 %238 = fmul nsz float %234, %234 %239 = fadd nsz float %237, %238 %240 = call nsz float @llvm.sqrt.f32(float %239) #2 %241 = fdiv nsz float 1.000000e+00, %240, !fpmath !1 %242 = fmul nsz float %232, %241 %243 = fmul nsz float %233, %241 %244 = fmul nsz float %234, %241 %245 = fmul nsz float %131, %202 %246 = fmul nsz float %134, %203 %247 = fadd nsz float %246, %245 %248 = fmul nsz float %137, %204 %249 = fadd nsz float %247, %248 %250 = call nsz float @llvm.maxnum.f32(float %249, float 0.000000e+00) #2 %251 = call nsz float @llvm.minnum.f32(float %250, float 1.000000e+00) #2 %252 = fmul nsz float %131, %242 %253 = fmul nsz float %134, %243 %254 = fadd nsz float %253, %252 %255 = fmul nsz float %137, %244 %256 = fadd nsz float %254, %255 %257 = call nsz float @llvm.maxnum.f32(float %256, float 0.000000e+00) #2 %258 = call nsz float @llvm.minnum.f32(float %257, float 1.000000e+00) #2 %259 = fmul nsz float %251, %258 %260 = fcmp nsz une float %259, 0.000000e+00 br i1 %260, label %if55, label %endif60 if55: ; preds = %endif44 %261 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 368) %262 = call nsz float @llvm.pow.f32(float %258, float %261) #2 %263 = fmul nsz float %TEMP3.w.1, %262 %264 = add i32 %150, 32 %265 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %264) %266 = fmul nsz float %265, %263 %267 = fadd nsz float %266, %TEMP9.x.0 %268 = add i32 %150, 36 %269 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %268) %270 = fmul nsz float %269, %263 %271 = fadd nsz float %270, %TEMP9.y.0 %272 = add i32 %150, 40 %273 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %272) %274 = fmul nsz float %273, %263 %275 = fadd nsz float %274, %TEMP9.z.0 %276 = add i32 %150, 44 %277 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %276) %278 = fmul nsz float %277, %263 %279 = fadd nsz float %278, %TEMP9.w.0 br label %endif60 endif60: ; preds = %if55, %endif44 %TEMP9.x.1 = phi float [ %267, %if55 ], [ %TEMP9.x.0, %endif44 ] %TEMP9.y.1 = phi float [ %271, %if55 ], [ %TEMP9.y.0, %endif44 ] %TEMP9.z.1 = phi float [ %275, %if55 ], [ %TEMP9.z.0, %endif44 ] %TEMP9.w.1 = phi float [ %279, %if55 ], [ %TEMP9.w.0, %endif44 ] %280 = fmul nsz float %TEMP3.w.1, %251 %281 = add i32 %150, 16 %282 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %281) %283 = fmul nsz float %282, %280 %284 = fadd nsz float %283, %TEMP7.x.0 %285 = add i32 %150, 20 %286 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %285) %287 = fmul nsz float %286, %280 %288 = fadd nsz float %287, %TEMP7.y.0 %289 = add i32 %150, 24 %290 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %289) %291 = fmul nsz float %290, %280 %292 = fadd nsz float %291, %TEMP7.z.0 %293 = add i32 %150, 48 %294 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %293) %295 = fmul nsz float %294, %TEMP3.w.1 %296 = fadd nsz float %295, %TEMP8.x.0 %297 = add i32 %150, 52 %298 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %297) %299 = fmul nsz float %298, %TEMP3.w.1 %300 = fadd nsz float %299, %TEMP8.y.0 %301 = add i32 %150, 56 %302 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %301) %303 = fmul nsz float %302, %TEMP3.w.1 %304 = fadd nsz float %303, %TEMP8.z.0 %305 = add i32 %150, 124 %306 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 %305) %307 = fcmp nsz une float %306, 0.000000e+00 br i1 %307, label %endloop68, label %endif66 endif66: ; preds = %endif60 %308 = fadd nsz float %TEMP6.w.0, 8.000000e+00 br label %loop15 endloop68: ; preds = %endif60 %309 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 336) %310 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 304) %311 = fmul nsz float %296, %309 %312 = fadd nsz float %311, %310 %313 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 340) %314 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 308) %315 = fmul nsz float %300, %313 %316 = fadd nsz float %315, %314 %317 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 344) %318 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 312) %319 = fmul nsz float %304, %317 %320 = fadd nsz float %319, %318 %321 = fmul nsz float %284, %31 %322 = fadd nsz float %321, %312 %323 = fmul nsz float %288, %32 %324 = fadd nsz float %323, %316 %325 = fmul nsz float %292, %33 %326 = fadd nsz float %325, %320 %327 = call nsz float @llvm.maxnum.f32(float %322, float 0.000000e+00) #2 %328 = call nsz float @llvm.minnum.f32(float %327, float 1.000000e+00) #2 %329 = call nsz float @llvm.maxnum.f32(float %324, float 0.000000e+00) #2 %330 = call nsz float @llvm.minnum.f32(float %329, float 1.000000e+00) #2 %331 = call nsz float @llvm.maxnum.f32(float %326, float 0.000000e+00) #2 %332 = call nsz float @llvm.minnum.f32(float %331, float 1.000000e+00) #2 %333 = call nsz float @llvm.maxnum.f32(float %34, float 0.000000e+00) #2 %334 = call nsz float @llvm.minnum.f32(float %333, float 1.000000e+00) #2 %335 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 352) %336 = fmul nsz float %TEMP9.x.1, %335 %337 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 356) %338 = fmul nsz float %TEMP9.y.1, %337 %339 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 360) %340 = fmul nsz float %TEMP9.z.1, %339 %341 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %36, i32 364) %342 = fmul nsz float %TEMP9.w.1, %341 %343 = call nsz float @llvm.maxnum.f32(float %336, float 0.000000e+00) #2 %344 = call nsz float @llvm.minnum.f32(float %343, float 1.000000e+00) #2 %345 = call nsz float @llvm.maxnum.f32(float %338, float 0.000000e+00) #2 %346 = call nsz float @llvm.minnum.f32(float %345, float 1.000000e+00) #2 %347 = call nsz float @llvm.maxnum.f32(float %340, float 0.000000e+00) #2 %348 = call nsz float @llvm.minnum.f32(float %347, float 1.000000e+00) #2 %349 = call nsz float @llvm.maxnum.f32(float %342, float 0.000000e+00) #2 %350 = call nsz float @llvm.minnum.f32(float %349, float 1.000000e+00) #2 %351 = and i32 %7, 1 %352 = icmp eq i32 %351, 0 br i1 %352, label %endif-block, label %if-true-block if-true-block: ; preds = %endloop68 %353 = call nsz float @llvm.maxnum.f32(float %328, float 0.000000e+00) #2 %354 = call nsz float @llvm.minnum.f32(float %353, float 1.000000e+00) #2 %355 = call nsz float @llvm.maxnum.f32(float %330, float 0.000000e+00) #2 %356 = call nsz float @llvm.minnum.f32(float %355, float 1.000000e+00) #2 %357 = call nsz float @llvm.maxnum.f32(float %332, float 0.000000e+00) #2 %358 = call nsz float @llvm.minnum.f32(float %357, float 1.000000e+00) #2 %359 = call nsz float @llvm.maxnum.f32(float %334, float 0.000000e+00) #2 %360 = call nsz float @llvm.minnum.f32(float %359, float 1.000000e+00) #2 %361 = call nsz float @llvm.maxnum.f32(float %344, float 0.000000e+00) #2 %362 = call nsz float @llvm.minnum.f32(float %361, float 1.000000e+00) #2 %363 = call nsz float @llvm.maxnum.f32(float %346, float 0.000000e+00) #2 %364 = call nsz float @llvm.minnum.f32(float %363, float 1.000000e+00) #2 %365 = call nsz float @llvm.maxnum.f32(float %348, float 0.000000e+00) #2 %366 = call nsz float @llvm.minnum.f32(float %365, float 1.000000e+00) #2 %367 = call nsz float @llvm.maxnum.f32(float %350, float 0.000000e+00) #2 %368 = call nsz float @llvm.minnum.f32(float %367, float 1.000000e+00) #2 br label %endif-block endif-block: ; preds = %endloop68, %if-true-block %OUT2.w.0 = phi float [ %368, %if-true-block ], [ %350, %endloop68 ] %OUT2.z.0 = phi float [ %366, %if-true-block ], [ %348, %endloop68 ] %OUT2.y.0 = phi float [ %364, %if-true-block ], [ %346, %endloop68 ] %OUT2.x.0 = phi float [ %362, %if-true-block ], [ %344, %endloop68 ] %OUT1.w.0 = phi float [ %360, %if-true-block ], [ %334, %endloop68 ] %OUT1.z.0 = phi float [ %358, %if-true-block ], [ %332, %endloop68 ] %OUT1.y.0 = phi float [ %356, %if-true-block ], [ %330, %endloop68 ] %OUT1.x.0 = phi float [ %354, %if-true-block ], [ %328, %endloop68 ] call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %71, float %74, float %77, float %80, i1 true, i1 false) #4 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %OUT1.x.0, float %OUT1.y.0, float %OUT1.z.0, float %OUT1.w.0, i1 false, i1 false) #4 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %OUT2.x.0, float %OUT2.y.0, float %OUT2.z.0, float %OUT2.w.0, i1 false, i1 false) #4 ret void } ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.floor.f32(float) #3 ; Function Attrs: nounwind readnone speculatable declare float @llvm.sqrt.f32(float) #3 ; Function Attrs: nounwind readnone speculatable declare float @llvm.pow.f32(float, float) #3 ; Function Attrs: nounwind readnone speculatable declare float @llvm.maxnum.f32(float, float) #3 ; Function Attrs: nounwind readnone speculatable declare float @llvm.minnum.f32(float, float) #3 ; Function Attrs: nounwind declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #4 attributes #0 = { "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } attributes #3 = { nounwind readnone speculatable } attributes #4 = { nounwind } !0 = !{} !1 = !{float 2.500000e+00} Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s8, v0 ; 32080008 v_mov_b32_e32 v6, v4 ; 7E0C0304 v_mov_b32_e32 v5, v4 ; 7E0A0304 Shader main disassembly: s_load_dwordx4 s[12:15], s[6:7], 0x0 ; C00A0303 00000000 s_load_dwordx4 s[0:3], s[2:3], 0x100 ; C00A0001 00000100 v_mov_b32_e32 v20, 0 ; 7E280280 v_mov_b32_e32 v19, 0 ; 7E260280 v_mov_b32_e32 v18, 0 ; 7E240280 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[25:28], v4, s[12:15], 0 idxen ; E00C2000 80031904 s_load_dwordx4 s[12:15], s[6:7], 0x10 ; C00A0303 00000010 s_load_dwordx4 s[4:7], s[6:7], 0x20 ; C00A0103 00000020 s_buffer_load_dword s8, s[0:3], 0x0 ; C0220200 00000000 s_buffer_load_dword s9, s[0:3], 0x4 ; C0220240 00000004 s_buffer_load_dword s10, s[0:3], 0x8 ; C0220280 00000008 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[29:32], v5, s[12:15], 0 idxen ; E00C2000 80031D05 buffer_load_format_xyzw v[4:7], v6, s[4:7], 0 idxen ; E00C2000 80010406 s_buffer_load_dword s4, s[0:3], 0x10 ; C0220100 00000010 s_buffer_load_dword s5, s[0:3], 0x14 ; C0220140 00000014 s_buffer_load_dword s6, s[0:3], 0x18 ; C0220180 00000018 s_buffer_load_dword s13, s[0:3], 0x40 ; C0220340 00000040 s_buffer_load_dword s14, s[0:3], 0x44 ; C0220380 00000044 s_buffer_load_dword s15, s[0:3], 0x48 ; C02203C0 00000048 s_buffer_load_dword s12, s[0:3], 0xc ; C0220300 0000000C s_buffer_load_dword s7, s[0:3], 0x1c ; C02201C0 0000001C v_mov_b32_e32 v23, 0 ; 7E2E0280 v_mov_b32_e32 v22, 0 ; 7E2C0280 v_mov_b32_e32 v21, 0 ; 7E2A0280 v_mov_b32_e32 v17, 0 ; 7E220280 v_mov_b32_e32 v16, 0 ; 7E200280 v_mov_b32_e32 v15, 0 ; 7E1E0280 s_waitcnt vmcnt(2) ; BF8C0F72 v_mul_f32_e32 v0, s8, v25 ; 0A003208 v_mul_f32_e32 v1, s9, v25 ; 0A023209 v_mul_f32_e32 v2, s10, v25 ; 0A04320A s_buffer_load_dword s8, s[0:3], 0x50 ; C0220200 00000050 s_buffer_load_dword s9, s[0:3], 0x54 ; C0220240 00000054 s_buffer_load_dword s10, s[0:3], 0x58 ; C0220280 00000058 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s4, v26 ; 2C003404 v_mac_f32_e32 v1, s5, v26 ; 2C023405 v_mac_f32_e32 v2, s6, v26 ; 2C043406 s_buffer_load_dword s4, s[0:3], 0x20 ; C0220100 00000020 s_buffer_load_dword s5, s[0:3], 0x24 ; C0220140 00000024 s_buffer_load_dword s6, s[0:3], 0x28 ; C0220180 00000028 v_mul_f32_e32 v8, s13, v25 ; 0A10320D v_mul_f32_e32 v9, s14, v25 ; 0A12320E v_mul_f32_e32 v10, s15, v25 ; 0A14320F v_mac_f32_e32 v8, s8, v26 ; 2C103408 v_mac_f32_e32 v9, s9, v26 ; 2C123409 v_mac_f32_e32 v10, s10, v26 ; 2C14340A s_buffer_load_dword s8, s[0:3], 0x60 ; C0220200 00000060 s_buffer_load_dword s9, s[0:3], 0x64 ; C0220240 00000064 s_buffer_load_dword s10, s[0:3], 0x68 ; C0220280 00000068 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s4, v27 ; 2C003604 v_mac_f32_e32 v1, s5, v27 ; 2C023605 v_mac_f32_e32 v2, s6, v27 ; 2C043606 s_buffer_load_dword s4, s[0:3], 0x70 ; C0220100 00000070 s_buffer_load_dword s5, s[0:3], 0x74 ; C0220140 00000074 s_buffer_load_dword s6, s[0:3], 0x78 ; C0220180 00000078 v_mac_f32_e32 v8, s8, v27 ; 2C103608 v_mac_f32_e32 v9, s9, v27 ; 2C123609 v_mac_f32_e32 v10, s10, v27 ; 2C14360A s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v8, s4, v28 ; 2C103804 v_mac_f32_e32 v9, s5, v28 ; 2C123805 v_mac_f32_e32 v10, s6, v28 ; 2C143806 s_buffer_load_dword s5, s[0:3], 0x104 ; C0220140 00000104 s_buffer_load_dword s6, s[0:3], 0x108 ; C0220180 00000108 s_buffer_load_dword s4, s[0:3], 0x100 ; C0220100 00000100 v_mul_f32_e32 v11, v8, v8 ; 0A161108 v_mac_f32_e32 v11, v9, v9 ; 2C161309 v_mul_f32_e32 v3, s12, v25 ; 0A06320C v_mac_f32_e32 v11, v10, v10 ; 2C16150A v_rsq_f32_e32 v14, v11 ; 7E1C490B v_mac_f32_e32 v3, s7, v26 ; 2C063407 s_buffer_load_dword s7, s[0:3], 0x2c ; C02201C0 0000002C s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v11, s4, v29 ; 0A163A04 v_mul_f32_e32 v12, s5, v29 ; 0A183A05 v_mul_f32_e32 v13, s6, v29 ; 0A1A3A06 s_buffer_load_dword s4, s[0:3], 0x110 ; C0220100 00000110 s_buffer_load_dword s5, s[0:3], 0x114 ; C0220140 00000114 s_buffer_load_dword s6, s[0:3], 0x118 ; C0220180 00000118 v_mac_f32_e32 v3, s7, v27 ; 2C063607 s_buffer_load_dword s7, s[0:3], 0x3c ; C02201C0 0000003C s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v11, s4, v30 ; 2C163C04 v_mac_f32_e32 v12, s5, v30 ; 2C183C05 v_mac_f32_e32 v13, s6, v30 ; 2C1A3C06 s_buffer_load_dword s4, s[0:3], 0x30 ; C0220100 00000030 s_buffer_load_dword s5, s[0:3], 0x34 ; C0220140 00000034 s_buffer_load_dword s6, s[0:3], 0x38 ; C0220180 00000038 s_buffer_load_dword s8, s[0:3], 0x120 ; C0220200 00000120 s_buffer_load_dword s9, s[0:3], 0x124 ; C0220240 00000124 s_buffer_load_dword s10, s[0:3], 0x128 ; C0220280 00000128 v_mul_f32_e32 v24, v14, v8 ; 0A30110E v_mul_f32_e32 v25, v14, v9 ; 0A32130E v_mul_f32_e32 v26, v14, v10 ; 0A34150E v_mov_b32_e32 v27, 0x42000000 ; 7E3602FF 42000000 v_mov_b32_e32 v14, 0 ; 7E1C0280 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s4, v28 ; 2C003804 v_mac_f32_e32 v1, s5, v28 ; 2C023805 v_mac_f32_e32 v2, s6, v28 ; 2C043806 v_mac_f32_e32 v3, s7, v28 ; 2C063807 v_mac_f32_e32 v11, s8, v31 ; 2C163E08 v_mac_f32_e32 v12, s9, v31 ; 2C183E09 v_mac_f32_e32 v13, s10, v31 ; 2C1A3E0A v_floor_f32_e32 v28, v27 ; 7E383F1B v_cvt_i32_f32_e32 v28, v28 ; 7E38111C v_lshlrev_b32_e32 v28, 4, v28 ; 24383884 v_add_i32_e32 v29, vcc, 0x58, v28 ; 323A38FF 00000058 buffer_load_dword v30, v29, s[0:3], 0 offen ; E0501000 80001E1D v_add_i32_e32 v29, vcc, 0x54, v28 ; 323A38FF 00000054 buffer_load_dword v31, v29, s[0:3], 0 offen ; E0501000 80001F1D v_add_i32_e32 v29, vcc, 0x50, v28 ; 323A38FF 00000050 buffer_load_dword v33, v28, s[0:3], 0 offen ; E0501000 8000211C buffer_load_dword v32, v29, s[0:3], 0 offen ; E0501000 8000201D v_mov_b32_e32 v29, 0x40400000 ; 7E3A02FF 40400000 s_waitcnt vmcnt(1) ; BF8C0F71 v_cmp_eq_f32_e32 vcc, v33, v29 ; 7C843B21 v_bfrev_b32_e32 v29, 1 ; 7E3A5881 s_waitcnt vmcnt(0) ; BF8C0F70 v_xor_b32_e32 v34, v32, v29 ; 2A443B20 v_xor_b32_e32 v35, v31, v29 ; 2A463B1F v_xor_b32_e32 v36, v30, v29 ; 2A483B1E v_mov_b32_e32 v29, 1.0 ; 7E3A02F2 s_cbranch_vccnz BB0_3 ; BF870000 v_add_i32_e32 v29, vcc, 64, v28 ; 323A38C0 v_add_i32_e32 v34, vcc, 0x44, v28 ; 324438FF 00000044 buffer_load_dword v29, v29, s[0:3], 0 offen ; E0501000 80001D1D buffer_load_dword v35, v34, s[0:3], 0 offen ; E0501000 80002322 v_add_i32_e32 v34, vcc, 0x48, v28 ; 324438FF 00000048 buffer_load_dword v36, v34, s[0:3], 0 offen ; E0501000 80002422 v_or_b32_e32 v34, 12, v28 ; 2844388C buffer_load_dword v37, v34, s[0:3], 0 offen ; E0501000 80002522 v_or_b32_e32 v34, 8, v28 ; 28443888 buffer_load_dword v38, v34, s[0:3], 0 offen ; E0501000 80002622 v_or_b32_e32 v34, 4, v28 ; 28443884 buffer_load_dword v39, v34, s[0:3], 0 offen ; E0501000 80002722 v_add_i32_e32 v34, vcc, 0x4c, v28 ; 324438FF 0000004C buffer_load_dword v40, v34, s[0:3], 0 offen ; E0501000 80002822 s_waitcnt vmcnt(6) ; BF8C0F76 v_subrev_f32_e32 v34, v8, v29 ; 06443B08 v_mul_f32_e32 v29, v34, v34 ; 0A3A4522 s_waitcnt vmcnt(5) ; BF8C0F75 v_subrev_f32_e32 v35, v9, v35 ; 06464709 v_mac_f32_e32 v29, v35, v35 ; 2C3A4723 s_waitcnt vmcnt(4) ; BF8C0F74 v_subrev_f32_e32 v36, v10, v36 ; 0648490A v_mac_f32_e32 v29, v36, v36 ; 2C3A4924 v_rsq_f32_e32 v41, v29 ; 7E52491D v_mul_f32_e32 v29, v41, v29 ; 0A3A3B29 s_waitcnt vmcnt(2) ; BF8C0F72 v_mac_f32_e32 v38, v37, v29 ; 2C4C3B25 s_waitcnt vmcnt(1) ; BF8C0F71 v_mac_f32_e32 v39, v38, v29 ; 2C4E3B26 v_rcp_f32_e32 v37, v39 ; 7E4A4527 s_waitcnt vmcnt(0) ; BF8C0F70 v_cmp_lt_f32_e32 vcc, v29, v40 ; 7C82511D v_cndmask_b32_e64 v29, 0, 1.0, vcc ; D100001D 01A9E480 v_mul_f32_e32 v29, v29, v37 ; 0A3A4B1D v_mul_f32_e32 v37, v34, v34 ; 0A4A4522 v_mac_f32_e32 v37, v35, v35 ; 2C4A4723 v_mac_f32_e32 v37, v36, v36 ; 2C4A4924 v_rsq_f32_e32 v37, v37 ; 7E4A4925 v_cmp_neq_f32_e32 vcc, 2.0, v33 ; 7C9A42F4 v_mul_f32_e32 v33, v37, v36 ; 0A424925 v_mul_f32_e32 v36, v37, v34 ; 0A484525 v_mul_f32_e32 v34, v37, v35 ; 0A444725 s_cbranch_vccnz BB0_5 ; BF870000 v_add_i32_e32 v35, vcc, 0x64, v28 ; 324638FF 00000064 v_add_i32_e32 v37, vcc, 0x68, v28 ; 324A38FF 00000068 buffer_load_dword v35, v35, s[0:3], 0 offen ; E0501000 80002323 buffer_load_dword v37, v37, s[0:3], 0 offen ; E0501000 80002525 v_add_i32_e32 v38, vcc, 0x5c, v28 ; 324C38FF 0000005C v_add_i32_e32 v39, vcc, 0x60, v28 ; 324E38FF 00000060 buffer_load_dword v38, v38, s[0:3], 0 offen ; E0501000 80002626 buffer_load_dword v39, v39, s[0:3], 0 offen ; E0501000 80002727 v_mul_f32_e32 v31, v31, v34 ; 0A3E451F v_mad_f32 v31, v36, -v32, -v31 ; D1C1001F C47E4124 v_mad_f32 v30, -v33, v30, v31 ; D1C1001E 247E3D21 s_waitcnt vmcnt(3) ; BF8C0F73 v_subrev_f32_e32 v31, v35, v30 ; 063E3D23 s_waitcnt vmcnt(2) ; BF8C0F72 v_mul_f32_e32 v31, v37, v31 ; 0A3E3F25 v_log_f32_e32 v31, v31 ; 7E3E431F v_cmp_ge_f32_e32 vcc, v30, v35 ; 7C8C471E v_cndmask_b32_e64 v32, 0, 1.0, vcc ; D1000020 01A9E480 s_waitcnt vmcnt(0) ; BF8C0F70 v_cmp_ge_f32_e32 vcc, v30, v39 ; 7C8C4F1E v_mul_legacy_f32_e32 v31, v38, v31 ; 083E3F26 v_exp_f32_e32 v31, v31 ; 7E3E411F v_cndmask_b32_e64 v30, 0, 1.0, vcc ; D100001E 01A9E480 v_mac_f32_e64 v30, v31, v32 clamp ; D116801E 0002411F v_mul_f32_e32 v29, v30, v29 ; 0A3A3B1E v_mul_f32_e32 v30, v36, v11 ; 0A3C1724 v_subrev_f32_e32 v31, v24, v36 ; 063E4918 v_mac_f32_e32 v30, v34, v12 ; 2C3C1922 v_subrev_f32_e32 v32, v25, v34 ; 06404519 v_mul_f32_e32 v34, v31, v31 ; 0A443F1F v_mac_f32_e64 v30, v13, v33 clamp ; D116801E 0002430D v_subrev_f32_e32 v33, v26, v33 ; 0642431A v_mac_f32_e32 v34, v32, v32 ; 2C444120 v_mac_f32_e32 v34, v33, v33 ; 2C444321 v_rsq_f32_e32 v34, v34 ; 7E444922 v_mul_f32_e32 v31, v34, v31 ; 0A3E3F22 v_mul_f32_e32 v32, v34, v32 ; 0A404122 v_mul_f32_e32 v31, v31, v11 ; 0A3E171F v_mul_f32_e32 v33, v34, v33 ; 0A424322 v_mac_f32_e32 v31, v32, v12 ; 2C3E1920 v_mac_f32_e64 v31, v13, v33 clamp ; D116801F 0002430D v_mul_f32_e32 v32, v31, v30 ; 0A403D1F v_cmp_neq_f32_e32 vcc, 0, v32 ; 7C9A4080 s_and_saveexec_b64 s[4:5], vcc ; BE84206A s_xor_b64 s[4:5], exec, s[4:5] ; 8884047E s_cbranch_execz BB0_7 ; BF880000 v_add_i32_e32 v32, vcc, 32, v28 ; 324038A0 v_add_i32_e32 v33, vcc, 36, v28 ; 324238A4 v_add_i32_e32 v34, vcc, 40, v28 ; 324438A8 v_add_i32_e32 v35, vcc, 44, v28 ; 324638AC buffer_load_dword v32, v32, s[0:3], 0 offen ; E0501000 80002020 buffer_load_dword v33, v33, s[0:3], 0 offen ; E0501000 80002121 buffer_load_dword v34, v34, s[0:3], 0 offen ; E0501000 80002222 buffer_load_dword v35, v35, s[0:3], 0 offen ; E0501000 80002323 s_buffer_load_dword s6, s[0:3], 0x170 ; C0220180 00000170 v_log_f32_e32 v31, v31 ; 7E3E431F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_legacy_f32_e32 v31, s6, v31 ; 083E3E06 v_exp_f32_e32 v31, v31 ; 7E3E411F v_mul_f32_e32 v31, v31, v29 ; 0A3E3B1F s_waitcnt vmcnt(3) ; BF8C0F73 v_mac_f32_e32 v17, v31, v32 ; 2C22411F s_waitcnt vmcnt(2) ; BF8C0F72 v_mac_f32_e32 v16, v31, v33 ; 2C20431F s_waitcnt vmcnt(1) ; BF8C0F71 v_mac_f32_e32 v15, v31, v34 ; 2C1E451F s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v14, v31, v35 ; 2C1C471F s_or_b64 exec, exec, s[4:5] ; 87FE047E v_add_i32_e32 v31, vcc, 0x7c, v28 ; 323E38FF 0000007C v_add_i32_e32 v32, vcc, 16, v28 ; 32403890 v_add_i32_e32 v33, vcc, 20, v28 ; 32423894 v_add_i32_e32 v34, vcc, 24, v28 ; 32443898 v_add_i32_e32 v35, vcc, 48, v28 ; 324638B0 v_add_i32_e32 v36, vcc, 52, v28 ; 324838B4 v_add_i32_e32 v28, vcc, 56, v28 ; 323838B8 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0501000 80001C1C buffer_load_dword v31, v31, s[0:3], 0 offen ; E0501000 80001F1F buffer_load_dword v35, v35, s[0:3], 0 offen ; E0501000 80002323 buffer_load_dword v36, v36, s[0:3], 0 offen ; E0501000 80002424 buffer_load_dword v32, v32, s[0:3], 0 offen ; E0501000 80002020 buffer_load_dword v33, v33, s[0:3], 0 offen ; E0501000 80002121 buffer_load_dword v34, v34, s[0:3], 0 offen ; E0501000 80002222 s_waitcnt vmcnt(6) ; BF8C0F76 v_mac_f32_e32 v21, v29, v28 ; 2C2A391D v_mul_f32_e32 v28, v30, v29 ; 0A383B1E s_waitcnt vmcnt(5) ; BF8C0F75 v_cmp_neq_f32_e32 vcc, 0, v31 ; 7C9A3E80 s_waitcnt vmcnt(4) ; BF8C0F74 v_mac_f32_e32 v23, v29, v35 ; 2C2E471D s_waitcnt vmcnt(3) ; BF8C0F73 v_mac_f32_e32 v22, v29, v36 ; 2C2C491D s_waitcnt vmcnt(2) ; BF8C0F72 v_mac_f32_e32 v20, v28, v32 ; 2C28411C s_waitcnt vmcnt(1) ; BF8C0F71 v_mac_f32_e32 v19, v28, v33 ; 2C26431C s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v18, v28, v34 ; 2C24451C v_mov_b32_e32 v28, -1 ; 7E3802C1 s_cbranch_vccnz BB0_9 ; BF870000 v_add_f32_e32 v27, 0x41000000, v27 ; 023636FF 41000000 v_mov_b32_e32 v28, 0 ; 7E380280 v_cmp_ne_u32_e32 vcc, 0, v28 ; 7D9A3880 v_cndmask_b32_e64 v28, 0, 1, vcc ; D100001C 01A90280 v_cmp_ne_u32_e32 vcc, 1, v28 ; 7D9A3881 s_and_b64 vcc, exec, vcc ; 86EA6A7E s_cbranch_vccnz BB0_1 ; BF870000 s_buffer_load_dword s4, s[0:3], 0x130 ; C0220100 00000130 s_buffer_load_dword s5, s[0:3], 0x134 ; C0220140 00000134 s_buffer_load_dword s6, s[0:3], 0x138 ; C0220180 00000138 s_buffer_load_dword s7, s[0:3], 0x150 ; C02201C0 00000150 s_buffer_load_dword s8, s[0:3], 0x154 ; C0220200 00000154 s_buffer_load_dword s9, s[0:3], 0x158 ; C0220240 00000158 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s4 ; 7E100204 v_mov_b32_e32 v9, s5 ; 7E120205 v_mov_b32_e32 v10, s6 ; 7E140206 s_buffer_load_dword s4, s[0:3], 0x160 ; C0220100 00000160 s_buffer_load_dword s5, s[0:3], 0x164 ; C0220140 00000164 s_buffer_load_dword s6, s[0:3], 0x168 ; C0220180 00000168 s_buffer_load_dword s0, s[0:3], 0x16c ; C0220000 0000016C v_mac_f32_e32 v8, s7, v23 ; 2C102E07 v_mac_f32_e32 v9, s8, v22 ; 2C122C08 v_mac_f32_e32 v10, s9, v21 ; 2C142A09 s_and_b32 s1, s11, 1 ; 8601810B v_mac_f32_e64 v8, v20, v4 clamp ; D1168008 00020914 v_mac_f32_e64 v9, v19, v5 clamp ; D1168009 00020B13 v_mac_f32_e64 v10, v18, v6 clamp ; D116800A 00020D12 v_max_f32_e64 v7, v7, v7 clamp ; D10B8007 00020F07 s_cmp_eq_u32 s1, 0 ; BF068001 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e64 v4, v17, s4 clamp ; D1058004 00000911 v_mul_f32_e64 v5, v16, s5 clamp ; D1058005 00000B10 v_mul_f32_e64 v6, v15, s6 clamp ; D1058006 00000D0F v_mul_f32_e64 v11, v14, s0 clamp ; D105800B 0000010E s_cbranch_scc1 BB0_12 ; BF850000 v_max_f32_e64 v8, v8, v8 clamp ; D10B8008 00021108 v_max_f32_e64 v9, v9, v9 clamp ; D10B8009 00021309 v_max_f32_e64 v10, v10, v10 clamp ; D10B800A 0002150A v_max_f32_e64 v7, v7, v7 clamp ; D10B8007 00020F07 v_max_f32_e64 v4, v4, v4 clamp ; D10B8004 00020904 v_max_f32_e64 v5, v5, v5 clamp ; D10B8005 00020B05 v_max_f32_e64 v6, v6, v6 clamp ; D10B8006 00020D06 v_max_f32_e64 v11, v11, v11 clamp ; D10B800B 0002170B exp pos0 v0, v1, v2, v3 done ; C40008CF 03020100 exp param0 v8, v9, v10, v7 ; C400020F 070A0908 exp param1 v4, v5, v6, v11 ; C400021F 0B060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 44 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 1780 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 5 buggy shader: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #0 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.floor.f32(float) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.sqrt.f32(float) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.maxnum.f32(float, float) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.minnum.f32(float, float) #2 ; Function Attrs: nounwind declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #3 define amdgpu_vs void @wrapper([0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x float] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, [0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), i32, i32, i32, i32) #4 { main_body: %13 = ptrtoint [0 x float] addrspace(2)* %2 to i64 %14 = bitcast i64 %13 to <2 x i32> %15 = extractelement <2 x i32> %14, i32 0 %16 = extractelement <2 x i32> %14, i32 1 %17 = add i32 %9, %4 %18 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %8, i64 0, i64 0, !amdgpu.uniform !0 %19 = load <4 x i32>, <4 x i32> addrspace(2)* %18, align 16, !invariant.load !0, !alias.scope !1, !noalias !4 %20 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %19, i32 %17, i32 0, i1 false, i1 false) #1 %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = extractelement <4 x float> %20, i32 2 %24 = extractelement <4 x float> %20, i32 3 %25 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %8, i64 0, i64 1, !amdgpu.uniform !0 %26 = load <4 x i32>, <4 x i32> addrspace(2)* %25, align 16, !invariant.load !0, !alias.scope !1, !noalias !4 %27 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %26, i32 %17, i32 0, i1 false, i1 false) #1 %28 = extractelement <4 x float> %27, i32 0 %29 = extractelement <4 x float> %27, i32 1 %30 = extractelement <4 x float> %27, i32 2 %31 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %8, i64 0, i64 2, !amdgpu.uniform !0 %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !invariant.load !0, !alias.scope !1, !noalias !4 %33 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %32, i32 %17, i32 0, i1 false, i1 false) #1 %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = and i32 %16, 65535 %39 = insertelement <4 x i32> , i32 %15, i32 0 %40 = insertelement <4 x i32> %39, i32 %38, i32 1 %41 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 0) %42 = fmul nsz float %21, %41 %43 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 4) %44 = fmul nsz float %21, %43 %45 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 8) %46 = fmul nsz float %21, %45 %47 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 12) %48 = fmul nsz float %21, %47 %49 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 16) %50 = fmul nsz float %22, %49 %51 = fadd nsz float %50, %42 %52 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 20) %53 = fmul nsz float %22, %52 %54 = fadd nsz float %53, %44 %55 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 24) %56 = fmul nsz float %22, %55 %57 = fadd nsz float %56, %46 %58 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 28) %59 = fmul nsz float %22, %58 %60 = fadd nsz float %59, %48 %61 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 32) %62 = fmul nsz float %23, %61 %63 = fadd nsz float %62, %51 %64 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 36) %65 = fmul nsz float %23, %64 %66 = fadd nsz float %65, %54 %67 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 40) %68 = fmul nsz float %23, %67 %69 = fadd nsz float %68, %57 %70 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 44) %71 = fmul nsz float %23, %70 %72 = fadd nsz float %71, %60 %73 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 48) %74 = fmul nsz float %24, %73 %75 = fadd nsz float %74, %63 %76 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 52) %77 = fmul nsz float %24, %76 %78 = fadd nsz float %77, %66 %79 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 56) %80 = fmul nsz float %24, %79 %81 = fadd nsz float %80, %69 %82 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 60) %83 = fmul nsz float %24, %82 %84 = fadd nsz float %83, %72 %85 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 64) %86 = fmul nsz float %21, %85 %87 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 68) %88 = fmul nsz float %21, %87 %89 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 72) %90 = fmul nsz float %21, %89 %91 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 80) %92 = fmul nsz float %22, %91 %93 = fadd nsz float %92, %86 %94 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 84) %95 = fmul nsz float %22, %94 %96 = fadd nsz float %95, %88 %97 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 88) %98 = fmul nsz float %22, %97 %99 = fadd nsz float %98, %90 %100 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 96) %101 = fmul nsz float %23, %100 %102 = fadd nsz float %101, %93 %103 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 100) %104 = fmul nsz float %23, %103 %105 = fadd nsz float %104, %96 %106 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 104) %107 = fmul nsz float %23, %106 %108 = fadd nsz float %107, %99 %109 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 112) %110 = fmul nsz float %24, %109 %111 = fadd nsz float %110, %102 %112 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 116) %113 = fmul nsz float %24, %112 %114 = fadd nsz float %113, %105 %115 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 120) %116 = fmul nsz float %24, %115 %117 = fadd nsz float %116, %108 %118 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 256) %119 = fmul nsz float %28, %118 %120 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 260) %121 = fmul nsz float %28, %120 %122 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 264) %123 = fmul nsz float %28, %122 %124 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 272) %125 = fmul nsz float %29, %124 %126 = fadd nsz float %125, %119 %127 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 276) %128 = fmul nsz float %29, %127 %129 = fadd nsz float %128, %121 %130 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 280) %131 = fmul nsz float %29, %130 %132 = fadd nsz float %131, %123 %133 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 288) %134 = fmul nsz float %30, %133 %135 = fadd nsz float %134, %126 %136 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 292) %137 = fmul nsz float %30, %136 %138 = fadd nsz float %137, %129 %139 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 296) %140 = fmul nsz float %30, %139 %141 = fadd nsz float %140, %132 br label %loop15.i loop15.i: ; preds = %endif66.i, %main_body %TEMP6.w.0.i = phi float [ 3.200000e+01, %main_body ], [ %261, %endif66.i ] %TEMP7.x.0.i = phi float [ 0.000000e+00, %main_body ], [ %237, %endif66.i ] %TEMP7.y.0.i = phi float [ 0.000000e+00, %main_body ], [ %241, %endif66.i ] %TEMP7.z.0.i = phi float [ 0.000000e+00, %main_body ], [ %245, %endif66.i ] %TEMP8.x.0.i = phi float [ 0.000000e+00, %main_body ], [ %249, %endif66.i ] %TEMP8.y.0.i = phi float [ 0.000000e+00, %main_body ], [ %253, %endif66.i ] %TEMP8.z.0.i = phi float [ 0.000000e+00, %main_body ], [ %257, %endif66.i ] %142 = call nsz float @llvm.floor.f32(float %TEMP6.w.0.i) #1 %143 = fptosi float %142 to i32 %144 = shl i32 %143, 4 %145 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %144) %146 = fcmp nsz une float %145, 3.000000e+00 %147 = add i32 %144, 80 %148 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %147) %149 = fsub nsz float -0.000000e+00, %148 %150 = add i32 %144, 84 %151 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %150) %152 = fsub nsz float -0.000000e+00, %151 %153 = add i32 %144, 88 %154 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %153) %155 = fsub nsz float -0.000000e+00, %154 br i1 %146, label %if20.i, label %endif30.i if20.i: ; preds = %loop15.i %156 = add i32 %144, 64 %157 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %156) %158 = fsub nsz float %157, %111 %159 = add i32 %144, 68 %160 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %159) %161 = fsub nsz float %160, %114 %162 = add i32 %144, 72 %163 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %162) %164 = fsub nsz float %163, %117 %165 = fmul nsz float %158, %158 %166 = fmul nsz float %161, %161 %167 = fadd nsz float %166, %165 %168 = fmul nsz float %164, %164 %169 = fadd nsz float %167, %168 %170 = call nsz float @llvm.sqrt.f32(float %169) #1 %171 = fdiv nsz float 1.000000e+00, %170, !fpmath !6 %172 = fmul nsz float %169, %171 %173 = or i32 %144, 12 %174 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %173) %175 = or i32 %144, 8 %176 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %175) %177 = fmul nsz float %172, %174 %178 = fadd nsz float %177, %176 %179 = or i32 %144, 4 %180 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %179) %181 = fmul nsz float %172, %178 %182 = fadd nsz float %181, %180 %183 = fdiv nsz float 1.000000e+00, %182, !fpmath !6 %184 = add i32 %144, 76 %185 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %184) %186 = fcmp nsz olt float %172, %185 %187 = select i1 %186, float 1.000000e+00, float 0.000000e+00 %188 = fmul nsz float %183, %187 br label %endif30.i endif30.i: ; preds = %if20.i, %loop15.i %TEMP3.w.0.i = phi float [ %188, %if20.i ], [ 1.000000e+00, %loop15.i ] %TEMP4.x.0.i = phi float [ %158, %if20.i ], [ %149, %loop15.i ] %TEMP4.y.0.i = phi float [ %161, %if20.i ], [ %152, %loop15.i ] %TEMP4.z.0.i = phi float [ %164, %if20.i ], [ %155, %loop15.i ] %189 = fmul nsz float %TEMP4.x.0.i, %TEMP4.x.0.i %190 = fmul nsz float %TEMP4.y.0.i, %TEMP4.y.0.i %191 = fadd nsz float %190, %189 %192 = fmul nsz float %TEMP4.z.0.i, %TEMP4.z.0.i %193 = fadd nsz float %191, %192 %194 = call nsz float @llvm.sqrt.f32(float %193) #1 %195 = fdiv nsz float 1.000000e+00, %194, !fpmath !6 %196 = fmul nsz float %TEMP4.x.0.i, %195 %197 = fmul nsz float %TEMP4.y.0.i, %195 %198 = fmul nsz float %TEMP4.z.0.i, %195 %199 = fcmp nsz oeq float %145, 2.000000e+00 br i1 %199, label %if35.i, label %endif44.i if35.i: ; preds = %endif30.i %200 = fmul nsz float %196, %148 %201 = fsub nsz float -0.000000e+00, %200 %202 = fmul nsz float %197, %151 %203 = fsub nsz float %201, %202 %204 = fmul nsz float %198, %154 %205 = fsub nsz float %203, %204 %206 = add i32 %144, 100 %207 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %206) %208 = fsub nsz float %205, %207 %209 = add i32 %144, 104 %210 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %209) %211 = fmul nsz float %208, %210 %212 = add i32 %144, 92 %213 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %212) %214 = call nsz float @llvm.pow.f32(float %211, float %213) #1 %215 = add i32 %144, 96 %216 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %215) %217 = fcmp nsz oge float %205, %216 %218 = select i1 %217, float 1.000000e+00, float 0.000000e+00 %219 = fcmp nsz oge float %205, %207 %220 = select i1 %219, float 1.000000e+00, float 0.000000e+00 %221 = fmul nsz float %214, %220 %222 = fadd nsz float %221, %218 %223 = call nsz float @llvm.maxnum.f32(float %222, float 0.000000e+00) #1 %224 = call nsz float @llvm.minnum.f32(float %223, float 1.000000e+00) #1 %225 = fmul nsz float %TEMP3.w.0.i, %224 br label %endif44.i endif44.i: ; preds = %if35.i, %endif30.i %TEMP3.w.1.i = phi float [ %225, %if35.i ], [ %TEMP3.w.0.i, %endif30.i ] %226 = fmul nsz float %135, %196 %227 = fmul nsz float %138, %197 %228 = fadd nsz float %227, %226 %229 = fmul nsz float %141, %198 %230 = fadd nsz float %228, %229 %231 = call nsz float @llvm.maxnum.f32(float %230, float 0.000000e+00) #1 %232 = call nsz float @llvm.minnum.f32(float %231, float 1.000000e+00) #1 %233 = fmul nsz float %TEMP3.w.1.i, %232 %234 = add i32 %144, 16 %235 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %234) %236 = fmul nsz float %235, %233 %237 = fadd nsz float %236, %TEMP7.x.0.i %238 = add i32 %144, 20 %239 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %238) %240 = fmul nsz float %239, %233 %241 = fadd nsz float %240, %TEMP7.y.0.i %242 = add i32 %144, 24 %243 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %242) %244 = fmul nsz float %243, %233 %245 = fadd nsz float %244, %TEMP7.z.0.i %246 = add i32 %144, 48 %247 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %246) %248 = fmul nsz float %247, %TEMP3.w.1.i %249 = fadd nsz float %248, %TEMP8.x.0.i %250 = add i32 %144, 52 %251 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %250) %252 = fmul nsz float %251, %TEMP3.w.1.i %253 = fadd nsz float %252, %TEMP8.y.0.i %254 = add i32 %144, 56 %255 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %254) %256 = fmul nsz float %255, %TEMP3.w.1.i %257 = fadd nsz float %256, %TEMP8.z.0.i %258 = add i32 %144, 124 %259 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 %258) %260 = fcmp nsz une float %259, 0.000000e+00 br i1 %260, label %endloop68.i, label %endif66.i endif66.i: ; preds = %endif44.i %261 = fadd nsz float %TEMP6.w.0.i, 8.000000e+00 br label %loop15.i endloop68.i: ; preds = %endif44.i %262 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 336) %263 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 304) %264 = fmul nsz float %249, %262 %265 = fadd nsz float %264, %263 %266 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 340) %267 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 308) %268 = fmul nsz float %253, %266 %269 = fadd nsz float %268, %267 %270 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 344) %271 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %40, i32 312) %272 = fmul nsz float %257, %270 %273 = fadd nsz float %272, %271 %274 = fmul nsz float %237, %34 %275 = fadd nsz float %274, %265 %276 = fmul nsz float %241, %35 %277 = fadd nsz float %276, %269 %278 = fmul nsz float %245, %36 %279 = fadd nsz float %278, %273 %280 = call nsz float @llvm.maxnum.f32(float %275, float 0.000000e+00) #1 %281 = call nsz float @llvm.minnum.f32(float %280, float 1.000000e+00) #1 %282 = call nsz float @llvm.maxnum.f32(float %277, float 0.000000e+00) #1 %283 = call nsz float @llvm.minnum.f32(float %282, float 1.000000e+00) #1 %284 = call nsz float @llvm.maxnum.f32(float %279, float 0.000000e+00) #1 %285 = call nsz float @llvm.minnum.f32(float %284, float 1.000000e+00) #1 %286 = call nsz float @llvm.maxnum.f32(float %37, float 0.000000e+00) #1 %287 = call nsz float @llvm.minnum.f32(float %286, float 1.000000e+00) #1 %288 = and i32 %7, 1 %289 = icmp eq i32 %288, 0 br i1 %289, label %main.exit, label %if-true-block.i if-true-block.i: ; preds = %endloop68.i %290 = call nsz float @llvm.maxnum.f32(float %281, float 0.000000e+00) #1 %291 = call nsz float @llvm.minnum.f32(float %290, float 1.000000e+00) #1 %292 = call nsz float @llvm.maxnum.f32(float %283, float 0.000000e+00) #1 %293 = call nsz float @llvm.minnum.f32(float %292, float 1.000000e+00) #1 %294 = call nsz float @llvm.maxnum.f32(float %285, float 0.000000e+00) #1 %295 = call nsz float @llvm.minnum.f32(float %294, float 1.000000e+00) #1 %296 = call nsz float @llvm.maxnum.f32(float %287, float 0.000000e+00) #1 %297 = call nsz float @llvm.minnum.f32(float %296, float 1.000000e+00) #1 br label %main.exit main.exit: ; preds = %endloop68.i, %if-true-block.i %OUT1.w.0.i = phi float [ %297, %if-true-block.i ], [ %287, %endloop68.i ] %OUT1.z.0.i = phi float [ %295, %if-true-block.i ], [ %285, %endloop68.i ] %OUT1.y.0.i = phi float [ %293, %if-true-block.i ], [ %283, %endloop68.i ] %OUT1.x.0.i = phi float [ %291, %if-true-block.i ], [ %281, %endloop68.i ] call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %75, float %78, float %81, float %84, i1 true, i1 false) #3, !noalias !1 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %OUT1.x.0.i, float %OUT1.y.0.i, float %OUT1.z.0.i, float %OUT1.w.0.i, i1 false, i1 false) #3, !noalias !1 ret void } attributes #0 = { nounwind readonly } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone speculatable } attributes #3 = { nounwind } attributes #4 = { "no-signed-zeros-fp-math"="true" } !0 = !{} !1 = !{!2} !2 = distinct !{!2, !3, !"main: argument 1"} !3 = distinct !{!3, !"main"} !4 = !{!5} !5 = distinct !{!5, !3, !"main: argument 0"} !6 = !{float 2.500000e+00} Vertex Shader as VS: Shader main disassembly: s_load_dwordx4 s[0:3], s[12:13], 0x0 ; C00A0006 00000000 v_add_i32_e32 v0, vcc, s8, v0 ; 32000008 s_and_b32 s5, s5, 0xffff ; 8605FF05 0000FFFF s_mov_b32 s7, 0x27fac ; BE8700FF 00027FAC s_movk_i32 s6, 0x190 ; B0060190 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[20:23], v0, s[0:3], 0 idxen ; E00C2000 80001400 s_load_dwordx4 s[0:3], s[12:13], 0x10 ; C00A0006 00000010 s_buffer_load_dword s8, s[4:7], 0x40 ; C0220202 00000040 s_buffer_load_dword s9, s[4:7], 0x44 ; C0220242 00000044 s_buffer_load_dword s10, s[4:7], 0x48 ; C0220282 00000048 s_buffer_load_dword s14, s[4:7], 0x128 ; C0220382 00000128 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[24:27], v0, s[0:3], 0 idxen ; E00C2000 80001800 s_load_dwordx4 s[0:3], s[12:13], 0x20 ; C00A0006 00000020 s_buffer_load_dword s12, s[4:7], 0x120 ; C0220302 00000120 s_buffer_load_dword s13, s[4:7], 0x124 ; C0220342 00000124 v_mov_b32_e32 v14, 0 ; 7E1C0280 v_mov_b32_e32 v16, 0 ; 7E200280 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[4:7], v0, s[0:3], 0 idxen ; E00C2000 80000400 s_buffer_load_dword s0, s[4:7], 0x0 ; C0220002 00000000 s_buffer_load_dword s1, s[4:7], 0x4 ; C0220042 00000004 s_buffer_load_dword s2, s[4:7], 0x8 ; C0220082 00000008 s_buffer_load_dword s3, s[4:7], 0xc ; C02200C2 0000000C v_mov_b32_e32 v15, 0 ; 7E1E0280 v_mov_b32_e32 v19, 0 ; 7E260280 v_mov_b32_e32 v18, 0 ; 7E240280 v_mov_b32_e32 v17, 0 ; 7E220280 s_waitcnt vmcnt(2) lgkmcnt(0) ; BF8C0072 v_mul_f32_e32 v0, s0, v20 ; 0A002800 v_mul_f32_e32 v1, s1, v20 ; 0A022801 v_mul_f32_e32 v2, s2, v20 ; 0A042802 s_buffer_load_dword s0, s[4:7], 0x10 ; C0220002 00000010 s_buffer_load_dword s1, s[4:7], 0x14 ; C0220042 00000014 s_buffer_load_dword s2, s[4:7], 0x18 ; C0220082 00000018 v_mul_f32_e32 v3, s3, v20 ; 0A062803 v_mul_f32_e32 v8, s8, v20 ; 0A102808 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s0, v21 ; 2C002A00 v_mac_f32_e32 v1, s1, v21 ; 2C022A01 v_mac_f32_e32 v2, s2, v21 ; 2C042A02 s_buffer_load_dword s0, s[4:7], 0x20 ; C0220002 00000020 s_buffer_load_dword s1, s[4:7], 0x24 ; C0220042 00000024 s_buffer_load_dword s2, s[4:7], 0x28 ; C0220082 00000028 v_mul_f32_e32 v9, s9, v20 ; 0A122809 v_mul_f32_e32 v10, s10, v20 ; 0A14280A s_buffer_load_dword s3, s[4:7], 0x1c ; C02200C2 0000001C s_buffer_load_dword s8, s[4:7], 0x50 ; C0220202 00000050 s_buffer_load_dword s9, s[4:7], 0x54 ; C0220242 00000054 s_buffer_load_dword s10, s[4:7], 0x58 ; C0220282 00000058 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s0, v22 ; 2C002C00 v_mac_f32_e32 v1, s1, v22 ; 2C022C01 v_mac_f32_e32 v2, s2, v22 ; 2C042C02 s_buffer_load_dword s0, s[4:7], 0x100 ; C0220002 00000100 s_buffer_load_dword s1, s[4:7], 0x104 ; C0220042 00000104 s_buffer_load_dword s2, s[4:7], 0x108 ; C0220082 00000108 v_mac_f32_e32 v3, s3, v21 ; 2C062A03 v_mac_f32_e32 v8, s8, v21 ; 2C102A08 v_mac_f32_e32 v9, s9, v21 ; 2C122A09 v_mac_f32_e32 v10, s10, v21 ; 2C142A0A s_buffer_load_dword s3, s[4:7], 0x2c ; C02200C2 0000002C s_buffer_load_dword s8, s[4:7], 0x60 ; C0220202 00000060 s_buffer_load_dword s9, s[4:7], 0x64 ; C0220242 00000064 s_buffer_load_dword s10, s[4:7], 0x68 ; C0220282 00000068 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v11, s0, v24 ; 0A163000 v_mul_f32_e32 v12, s1, v24 ; 0A183001 v_mul_f32_e32 v13, s2, v24 ; 0A1A3002 s_buffer_load_dword s0, s[4:7], 0x110 ; C0220002 00000110 s_buffer_load_dword s1, s[4:7], 0x114 ; C0220042 00000114 s_buffer_load_dword s2, s[4:7], 0x118 ; C0220082 00000118 v_mac_f32_e32 v3, s3, v22 ; 2C062C03 v_mac_f32_e32 v8, s8, v22 ; 2C102C08 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v11, s0, v25 ; 2C163200 v_mac_f32_e32 v12, s1, v25 ; 2C183201 v_mac_f32_e32 v13, s2, v25 ; 2C1A3202 v_mac_f32_e32 v9, s9, v22 ; 2C122C09 v_mac_f32_e32 v10, s10, v22 ; 2C142C0A s_buffer_load_dword s0, s[4:7], 0x30 ; C0220002 00000030 s_buffer_load_dword s1, s[4:7], 0x34 ; C0220042 00000034 s_buffer_load_dword s2, s[4:7], 0x38 ; C0220082 00000038 s_buffer_load_dword s3, s[4:7], 0x3c ; C02200C2 0000003C s_buffer_load_dword s8, s[4:7], 0x70 ; C0220202 00000070 s_buffer_load_dword s9, s[4:7], 0x74 ; C0220242 00000074 s_buffer_load_dword s10, s[4:7], 0x78 ; C0220282 00000078 v_mov_b32_e32 v20, 0x42000000 ; 7E2802FF 42000000 v_mov_b32_e32 v21, 0x40400000 ; 7E2A02FF 40400000 v_bfrev_b32_e32 v22, 1 ; 7E2C5881 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s0, v23 ; 2C002E00 v_mac_f32_e32 v1, s1, v23 ; 2C022E01 v_mac_f32_e32 v2, s2, v23 ; 2C042E02 v_mac_f32_e32 v3, s3, v23 ; 2C062E03 v_mac_f32_e32 v8, s8, v23 ; 2C102E08 v_mac_f32_e32 v9, s9, v23 ; 2C122E09 v_mac_f32_e32 v10, s10, v23 ; 2C142E0A v_mac_f32_e32 v11, s12, v26 ; 2C16340C v_mac_f32_e32 v12, s13, v26 ; 2C18340D v_mac_f32_e32 v13, s14, v26 ; 2C1A340E s_branch BB0_2 ; BF820000 v_add_f32_e32 v20, 0x41000000, v20 ; 022828FF 41000000 v_floor_f32_e32 v23, v20 ; 7E2E3F14 v_cvt_i32_f32_e32 v23, v23 ; 7E2E1117 v_lshlrev_b32_e32 v23, 4, v23 ; 242E2E84 v_add_i32_e32 v24, vcc, 0x58, v23 ; 32302EFF 00000058 buffer_load_dword v25, v24, s[4:7], 0 offen ; E0501000 80011918 v_add_i32_e32 v24, vcc, 0x54, v23 ; 32302EFF 00000054 buffer_load_dword v26, v24, s[4:7], 0 offen ; E0501000 80011A18 v_add_i32_e32 v24, vcc, 0x50, v23 ; 32302EFF 00000050 buffer_load_dword v28, v23, s[4:7], 0 offen ; E0501000 80011C17 buffer_load_dword v27, v24, s[4:7], 0 offen ; E0501000 80011B18 s_waitcnt vmcnt(1) ; BF8C0F71 v_cmp_eq_f32_e32 vcc, v28, v21 ; 7C842B1C s_cbranch_vccnz BB0_4 ; BF870000 v_add_i32_e32 v24, vcc, 64, v23 ; 32302EC0 v_add_i32_e32 v29, vcc, 0x44, v23 ; 323A2EFF 00000044 buffer_load_dword v24, v24, s[4:7], 0 offen ; E0501000 80011818 buffer_load_dword v30, v29, s[4:7], 0 offen ; E0501000 80011E1D v_add_i32_e32 v29, vcc, 0x48, v23 ; 323A2EFF 00000048 buffer_load_dword v31, v29, s[4:7], 0 offen ; E0501000 80011F1D v_or_b32_e32 v29, 12, v23 ; 283A2E8C buffer_load_dword v32, v29, s[4:7], 0 offen ; E0501000 8001201D v_or_b32_e32 v29, 8, v23 ; 283A2E88 buffer_load_dword v33, v29, s[4:7], 0 offen ; E0501000 8001211D v_or_b32_e32 v29, 4, v23 ; 283A2E84 buffer_load_dword v34, v29, s[4:7], 0 offen ; E0501000 8001221D v_add_i32_e32 v29, vcc, 0x4c, v23 ; 323A2EFF 0000004C buffer_load_dword v35, v29, s[4:7], 0 offen ; E0501000 8001231D s_waitcnt vmcnt(6) ; BF8C0F76 v_subrev_f32_e32 v29, v8, v24 ; 063A3108 v_mul_f32_e32 v24, v29, v29 ; 0A303B1D s_waitcnt vmcnt(5) ; BF8C0F75 v_subrev_f32_e32 v30, v9, v30 ; 063C3D09 v_mac_f32_e32 v24, v30, v30 ; 2C303D1E s_waitcnt vmcnt(4) ; BF8C0F74 v_subrev_f32_e32 v31, v10, v31 ; 063E3F0A v_mac_f32_e32 v24, v31, v31 ; 2C303F1F v_rsq_f32_e32 v36, v24 ; 7E484918 v_mul_f32_e32 v24, v36, v24 ; 0A303124 s_waitcnt vmcnt(2) ; BF8C0F72 v_mac_f32_e32 v33, v32, v24 ; 2C423120 s_waitcnt vmcnt(1) ; BF8C0F71 v_mac_f32_e32 v34, v33, v24 ; 2C443121 v_rcp_f32_e32 v32, v34 ; 7E404522 s_waitcnt vmcnt(0) ; BF8C0F70 v_cmp_lt_f32_e32 vcc, v24, v35 ; 7C824718 v_cndmask_b32_e64 v24, 0, 1.0, vcc ; D1000018 01A9E480 v_mul_f32_e32 v24, v24, v32 ; 0A304118 s_branch BB0_5 ; BF820000 s_waitcnt vmcnt(0) ; BF8C0F70 v_xor_b32_e32 v29, v27, v22 ; 2A3A2D1B v_xor_b32_e32 v30, v26, v22 ; 2A3C2D1A v_xor_b32_e32 v31, v25, v22 ; 2A3E2D19 v_mov_b32_e32 v24, 1.0 ; 7E3002F2 v_mul_f32_e32 v32, v29, v29 ; 0A403B1D v_mac_f32_e32 v32, v30, v30 ; 2C403D1E v_mac_f32_e32 v32, v31, v31 ; 2C403F1F v_rsq_f32_e32 v32, v32 ; 7E404920 v_cmp_neq_f32_e32 vcc, 2.0, v28 ; 7C9A38F4 v_mul_f32_e32 v28, v32, v31 ; 0A383F20 v_mul_f32_e32 v31, v32, v29 ; 0A3E3B20 v_mul_f32_e32 v29, v32, v30 ; 0A3A3D20 s_cbranch_vccnz BB0_7 ; BF870000 v_add_i32_e32 v30, vcc, 0x64, v23 ; 323C2EFF 00000064 v_add_i32_e32 v32, vcc, 0x68, v23 ; 32402EFF 00000068 buffer_load_dword v30, v30, s[4:7], 0 offen ; E0501000 80011E1E buffer_load_dword v32, v32, s[4:7], 0 offen ; E0501000 80012020 v_add_i32_e32 v33, vcc, 0x5c, v23 ; 32422EFF 0000005C v_add_i32_e32 v34, vcc, 0x60, v23 ; 32442EFF 00000060 buffer_load_dword v33, v33, s[4:7], 0 offen ; E0501000 80012121 buffer_load_dword v34, v34, s[4:7], 0 offen ; E0501000 80012222 v_mul_f32_e32 v26, v26, v29 ; 0A343B1A v_mad_f32 v26, v31, -v27, -v26 ; D1C1001A C46A371F v_mad_f32 v25, -v28, v25, v26 ; D1C10019 246A331C s_waitcnt vmcnt(3) ; BF8C0F73 v_subrev_f32_e32 v26, v30, v25 ; 0634331E s_waitcnt vmcnt(2) ; BF8C0F72 v_mul_f32_e32 v26, v32, v26 ; 0A343520 v_log_f32_e32 v26, v26 ; 7E34431A v_cmp_ge_f32_e32 vcc, v25, v30 ; 7C8C3D19 v_cndmask_b32_e64 v27, 0, 1.0, vcc ; D100001B 01A9E480 s_waitcnt vmcnt(0) ; BF8C0F70 v_cmp_ge_f32_e32 vcc, v25, v34 ; 7C8C4519 v_mul_legacy_f32_e32 v26, v33, v26 ; 08343521 v_exp_f32_e32 v26, v26 ; 7E34411A v_cndmask_b32_e64 v25, 0, 1.0, vcc ; D1000019 01A9E480 v_mac_f32_e64 v25, v26, v27 clamp ; D1168019 0002371A v_mul_f32_e32 v24, v25, v24 ; 0A303119 v_mul_f32_e32 v25, v31, v11 ; 0A32171F v_mac_f32_e32 v25, v29, v12 ; 2C32191D v_mac_f32_e64 v25, v13, v28 clamp ; D1168019 0002390D v_add_i32_e32 v26, vcc, 0x7c, v23 ; 32342EFF 0000007C v_add_i32_e32 v27, vcc, 16, v23 ; 32362E90 v_add_i32_e32 v28, vcc, 20, v23 ; 32382E94 v_add_i32_e32 v29, vcc, 24, v23 ; 323A2E98 v_add_i32_e32 v30, vcc, 48, v23 ; 323C2EB0 v_add_i32_e32 v31, vcc, 52, v23 ; 323E2EB4 v_add_i32_e32 v23, vcc, 56, v23 ; 322E2EB8 buffer_load_dword v26, v26, s[4:7], 0 offen ; E0501000 80011A1A buffer_load_dword v30, v30, s[4:7], 0 offen ; E0501000 80011E1E buffer_load_dword v31, v31, s[4:7], 0 offen ; E0501000 80011F1F buffer_load_dword v23, v23, s[4:7], 0 offen ; E0501000 80011717 buffer_load_dword v27, v27, s[4:7], 0 offen ; E0501000 80011B1B buffer_load_dword v28, v28, s[4:7], 0 offen ; E0501000 80011C1C buffer_load_dword v29, v29, s[4:7], 0 offen ; E0501000 80011D1D v_mul_f32_e32 v25, v25, v24 ; 0A323119 s_waitcnt vmcnt(6) ; BF8C0F76 v_cmp_neq_f32_e32 vcc, 0, v26 ; 7C9A3480 s_waitcnt vmcnt(5) ; BF8C0F75 v_mac_f32_e32 v19, v24, v30 ; 2C263D18 s_waitcnt vmcnt(4) ; BF8C0F74 v_mac_f32_e32 v18, v24, v31 ; 2C243F18 s_waitcnt vmcnt(3) ; BF8C0F73 v_mac_f32_e32 v17, v24, v23 ; 2C222F18 s_waitcnt vmcnt(2) ; BF8C0F72 v_mac_f32_e32 v14, v25, v27 ; 2C1C3719 s_waitcnt vmcnt(1) ; BF8C0F71 v_mac_f32_e32 v16, v25, v28 ; 2C203919 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v15, v25, v29 ; 2C1E3B19 s_cbranch_vccz BB0_1 ; BF860000 s_buffer_load_dword s0, s[4:7], 0x130 ; C0220002 00000130 s_buffer_load_dword s1, s[4:7], 0x134 ; C0220042 00000134 s_buffer_load_dword s2, s[4:7], 0x138 ; C0220082 00000138 s_buffer_load_dword s3, s[4:7], 0x150 ; C02200C2 00000150 s_buffer_load_dword s8, s[4:7], 0x154 ; C0220202 00000154 s_buffer_load_dword s4, s[4:7], 0x158 ; C0220102 00000158 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s0 ; 7E100200 v_mov_b32_e32 v9, s1 ; 7E120201 v_mov_b32_e32 v10, s2 ; 7E140202 v_mac_f32_e32 v8, s3, v19 ; 2C102603 v_mac_f32_e32 v9, s8, v18 ; 2C122408 v_mac_f32_e32 v10, s4, v17 ; 2C142204 s_and_b32 s0, s11, 1 ; 8600810B v_mac_f32_e64 v8, v14, v4 clamp ; D1168008 0002090E v_mac_f32_e64 v9, v16, v5 clamp ; D1168009 00020B10 v_mac_f32_e64 v10, v15, v6 clamp ; D116800A 00020D0F v_max_f32_e64 v4, v7, v7 clamp ; D10B8004 00020F07 s_cmp_eq_u32 s0, 0 ; BF068000 s_cbranch_scc1 BB0_10 ; BF850000 v_max_f32_e64 v8, v8, v8 clamp ; D10B8008 00021108 v_max_f32_e64 v9, v9, v9 clamp ; D10B8009 00021309 v_max_f32_e64 v10, v10, v10 clamp ; D10B800A 0002150A v_max_f32_e64 v4, v4, v4 clamp ; D10B8004 00020904 exp pos0 v0, v1, v2, v3 done ; C40008CF 03020100 exp param0 v8, v9, v10, v4 ; C400020F 040A0908 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 40 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 1420 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 6