%93 = extractelement <4 x float> %91, i32 1 %94 = extractelement <4 x float> %91, i32 2 %95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 %97 = add i32 %5, %7 %98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97) %99 = extractelement <4 x float> %98, i32 0 %100 = extractelement <4 x float> %98, i32 1 %101 = extractelement <4 x float> %98, i32 2 %102 = extractelement <4 x float> %98, i32 3 %103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = add i32 %5, %7 %106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105) %107 = extractelement <4 x float> %106, i32 0 %108 = extractelement <4 x float> %106, i32 1 %109 = extractelement <4 x float> %106, i32 2 %110 = extractelement <4 x float> %106, i32 3 %111 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %112 = load <16 x i8>, <16 x i8> addrspace(2)* %111, align 16, !tbaa !0 %113 = add i32 %5, %7 %114 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %112, i32 0, i32 %113) %115 = extractelement <4 x float> %114, i32 0 %116 = extractelement <4 x float> %114, i32 1 %117 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 5 %118 = load <16 x i8>, <16 x i8> addrspace(2)* %117, align 16, !tbaa !0 %119 = add i32 %5, %7 %120 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %118, i32 0, i32 %119) %121 = extractelement <4 x float> %120, i32 0 %122 = extractelement <4 x float> %120, i32 1 %123 = fmul float %121, %76 %124 = fadd float %123, %79 %125 = fmul float %122, %77 %126 = fadd float %125, %78 %127 = fmul float %40, %85 %128 = fmul float %41, %85 %129 = fmul float %42, %85 %130 = fmul float %43, %85 %131 = fmul float %36, %84 %132 = fadd float %131, %127 %133 = fmul float %37, %84 %134 = fadd float %133, %128 %135 = fmul float %38, %84 %136 = fadd float %135, %129 %137 = fmul float %39, %84 %138 = fadd float %137, %130 %139 = fmul float %44, %86 %140 = fadd float %139, %132 %141 = fmul float %45, %86 %142 = fadd float %141, %134 %143 = fmul float %46, %86 %144 = fadd float %143, %136 %145 = fmul float %47, %86 %146 = fadd float %145, %138 %147 = fmul float %48, %87 %148 = fadd float %147, %140 %149 = fmul float %49, %87 %150 = fadd float %149, %142 %151 = fmul float %50, %87 %152 = fadd float %151, %144 %153 = fmul float %51, %87 %154 = fadd float %153, %146 %155 = fsub float %148, %33 %156 = fsub float %150, %34 %157 = fsub float %152, %35 %158 = fmul float %69, %156 %159 = fadd float %158, %70 %160 = fadd float %159, 5.000000e-01 %161 = call float @llvm.AMDIL.fraction.(float %160) %162 = fmul float %161, 0x401921FB60000000 %163 = fadd float %162, 0xC00921FB60000000 %164 = call float @llvm.sin.f32(float %163) %165 = fmul float %164, %71 %166 = fmul float %72, %156 %167 = fadd float %166, %73 %168 = fadd float %167, 5.000000e-01 %169 = call float @llvm.AMDIL.fraction.(float %168) %170 = fmul float %169, 0x401921FB60000000 %171 = fadd float %170, 0xC00921FB60000000 %172 = call float @llvm.sin.f32(float %171) %173 = fmul float %172, %74 %174 = fmul float %165, %173 %175 = fmul float %174, 0x3FC45F3060000000 %176 = fadd float %175, 5.000000e-01 %177 = call float @llvm.AMDIL.fraction.(float %176) %178 = fmul float %177, 0x401921FB60000000 %179 = fadd float %178, 0xC00921FB60000000 %180 = call float @llvm.cos.f32(float %179) %181 = call float @llvm.sin.f32(float %179) %182 = fadd float %61, %64 %183 = fadd float %62, %65 %184 = fadd float %63, %66 %185 = fsub float %155, %182 %186 = fsub float %156, %183 %187 = fsub float %157, %184 %188 = fmul float %67, %185 %189 = fmul float %68, %186 %190 = fadd float %189, %188 %191 = fmul float %75, %187 %192 = fadd float %190, %191 %193 = fmul float %67, %192 %194 = fadd float %193, %182 %195 = fmul float %68, %192 %196 = fadd float %195, %183 %197 = fmul float %75, %192 %198 = fadd float %197, %184 %199 = fsub float %155, %194 %200 = fsub float %156, %196 %201 = fsub float %157, %198 %202 = fmul float %200, %75 %203 = fmul float %201, %67 %204 = fmul float %199, %68 %205 = fmul float %68, %201 %206 = fsub float %205, %202 %207 = fmul float %75, %199 %208 = fsub float %207, %203 %209 = fmul float %67, %200 %210 = fsub float %209, %204 %211 = fmul float %181, %206 %212 = fmul float %181, %208 %213 = fmul float %181, %210 %214 = fmul float %199, %180 %215 = fadd float %214, %211 %216 = fmul float %200, %180 %217 = fadd float %216, %212 %218 = fmul float %201, %180 %219 = fadd float %218, %213 %220 = fadd float %215, %194 %221 = fadd float %217, %196 %222 = fadd float %219, %198 %223 = fsub float %220, %155 %224 = fsub float %221, %156 %225 = fsub float %222, %157 %226 = fmul float %107, %223 %227 = fadd float %226, %148 %228 = fmul float %108, %224 %229 = fadd float %228, %150 %230 = fmul float %109, %225 %231 = fadd float %230, %152 %232 = fmul float %32, %227 %233 = fsub float %29, %232 %234 = fmul float %32, %229 %235 = fsub float %30, %234 %236 = fmul float %32, %231 %237 = fsub float %31, %236 %238 = fmul float %235, %55 %239 = fmul float %235, %56 %240 = fmul float %235, %57 %241 = fmul float %52, %233 %242 = fadd float %241, %238 %243 = fmul float %53, %233 %244 = fadd float %243, %239 %245 = fmul float %54, %233 %246 = fadd float %245, %240 %247 = fmul float %58, %237 %248 = fadd float %247, %242 %249 = fmul float %59, %237 %250 = fadd float %249, %244 %251 = fmul float %60, %237 %252 = fadd float %251, %246 %253 = fmul float %93, 0x3F80101020000000 %254 = fadd float %253, -1.000000e+00 %255 = fmul float %94, 0x3F80101020000000 %256 = fadd float %255, -1.000000e+00 %257 = fmul float %92, 0x3F80101020000000 %258 = fadd float %257, -1.000000e+00 %259 = fmul float %99, 0x3F80101020000000 %260 = fadd float %259, -1.000000e+00 %261 = fmul float %100, 0x3F80101020000000 %262 = fadd float %261, -1.000000e+00 %263 = fmul float %101, 0x3F80101020000000 %264 = fadd float %263, -1.000000e+00 %265 = fmul float %102, 0x3F80101020000000 %266 = fadd float %265, -1.000000e+00 %267 = fmul float %254, %264 %268 = fmul float %256, %260 %269 = fmul float %258, %262 %270 = fmul float %262, %256 %271 = fsub float %270, %267 %272 = fmul float %264, %258 %273 = fsub float %272, %268 %274 = fmul float %260, %254 %275 = fsub float %274, %269 %276 = fmul float %266, %271 %277 = fmul float %266, %273 %278 = fmul float %266, %275 %279 = fmul float %262, %278 %280 = fmul float %264, %276 %281 = fmul float %260, %277 %282 = fmul float %277, %264 %283 = fsub float %282, %279 %284 = fmul float %278, %260 %285 = fsub float %284, %280 %286 = fmul float %276, %262 %287 = fsub float %286, %281 %288 = fmul float %266, %283 %289 = fmul float %266, %285 %290 = fmul float %266, %287 %291 = fmul float %288, %248 %292 = fmul float %289, %250 %293 = fadd float %292, %291 %294 = fmul float %290, %252 %295 = fadd float %293, %294 %296 = fmul float %288, %58 %297 = fmul float %289, %59 %298 = fadd float %297, %296 %299 = fmul float %290, %60 %300 = fadd float %298, %299 %301 = fmul float %276, %248 %302 = fmul float %277, %250 %303 = fadd float %302, %301 %304 = fmul float %278, %252 %305 = fadd float %303, %304 %306 = fmul float %276, %58 %307 = fmul float %277, %59 %308 = fadd float %307, %306 %309 = fmul float %278, %60 %310 = fadd float %308, %309 %311 = fmul float %260, %248 %312 = fmul float %262, %250 %313 = fadd float %312, %311 %314 = fmul float %264, %252 %315 = fadd float %313, %314 %316 = fmul float %260, %58 %317 = fmul float %262, %59 %318 = fadd float %317, %316 %319 = fmul float %264, %60 %320 = fadd float %318, %319 %321 = fmul float %115, 0.000000e+00 %322 = fmul float %115, 0.000000e+00 %323 = fmul float %229, %17 %324 = fmul float %229, %18 %325 = fmul float %229, %19 %326 = fmul float %229, %20 %327 = fmul float %13, %227 %328 = fadd float %327, %323 %329 = fmul float %14, %227 %330 = fadd float %329, %324 %331 = fmul float %15, %227 %332 = fadd float %331, %325 %333 = fmul float %16, %227 %334 = fadd float %333, %326 %335 = fmul float %21, %231 %336 = fadd float %335, %328 %337 = fmul float %22, %231 %338 = fadd float %337, %330 %339 = fmul float %23, %231 %340 = fadd float %339, %332 %341 = fmul float %24, %231 %342 = fadd float %341, %334 %343 = fmul float %25, %154 %344 = fadd float %343, %336 %345 = fmul float %26, %154 %346 = fadd float %345, %338 %347 = fmul float %27, %154 %348 = fadd float %347, %340 %349 = fmul float %28, %154 %350 = fadd float %349, %342 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %107, float %108, float %109, float %110) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %124, float %126, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %115, float %116, float %321, float %322) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %344, float %346, float %348, float %350) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %295, float %305, float %315, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %300, float %310, float %320, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %344, float %346, float %348, float %350) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc0490fdb ; 7E0202FF C0490FDB v_mov_b32_e32 v2, 0x40c90fdb ; 7E0402FF 40C90FDB v_mov_b32_e32 v3, 0x3e22f983 ; 7E0602FF 3E22F983 v_mov_b32_e32 v4, 0x3c008081 ; 7E0802FF 3C008081 v_mov_b32_e32 v5, 0 ; 7E0A0280 v_mov_b32_e32 v6, 1.0 ; 7E0C02F2 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[44:47], s[2:3], 0x0 ; C0960300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[48:51], s[8:9], 0xc ; C098090C s_load_dwordx4 s[52:55], s[8:9], 0x10 ; C09A0910 s_load_dwordx4 s[56:59], s[8:9], 0x14 ; C09C0914 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s23, s[44:47], 0x1a ; C20BAD1A buffer_load_format_xyzw v[7:10], v0, s[0:3], 0 idxen ; E00C2000 80000700 buffer_load_format_xyzw v[11:14], v0, s[4:7], 0 idxen ; E00C2000 80010B00 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[14:17], v0, s[12:15], 0 idxen ; E00C2000 80030E00 s_buffer_load_dword s5, s[44:47], 0x1b ; C202AD1B s_buffer_load_dword s39, s[44:47], 0x1c ; C213AD1C s_buffer_load_dword s60, s[44:47], 0x1d ; C21E2D1D s_buffer_load_dword s31, s[44:47], 0x1e ; C20FAD1E s_buffer_load_dword s36, s[44:47], 0x14 ; C2122D14 s_buffer_load_dword s61, s[44:47], 0x15 ; C21EAD15 s_buffer_load_dword s29, s[44:47], 0x16 ; C20EAD16 s_buffer_load_dword s40, s[44:47], 0x18 ; C2142D18 s_buffer_load_dword s62, s[44:47], 0x19 ; C21F2D19 s_buffer_load_dword s6, s[44:47], 0x1f ; C2032D1F s_buffer_load_dword s41, s[44:47], 0x20 ; C214AD20 s_buffer_load_dword s63, s[44:47], 0x21 ; C21FAD21 s_buffer_load_dword s35, s[44:47], 0x22 ; C211AD22 s_buffer_load_dword s7, s[44:47], 0x23 ; C203AD23 s_buffer_load_dword s42, s[44:47], 0x24 ; C2152D24 s_buffer_load_dword s64, s[44:47], 0x25 ; C2202D25 s_buffer_load_dword s37, s[44:47], 0x26 ; C212AD26 s_buffer_load_dword s9, s[44:47], 0x27 ; C204AD27 s_buffer_load_dword s3, s[44:47], 0x28 ; C201AD28 s_buffer_load_dword s17, s[44:47], 0x3d ; C208AD3D s_buffer_load_dword s65, s[44:47], 0x42 ; C220AD42 s_buffer_load_dword s0, s[44:47], 0x45 ; C2002D45 s_buffer_load_dword s66, s[44:47], 0x46 ; C2212D46 s_buffer_load_dword s67, s[44:47], 0x47 ; C221AD47 s_buffer_load_dword s4, s[44:47], 0x49 ; C2022D49 s_buffer_load_dword s43, s[44:47], 0x4a ; C215AD4A s_buffer_load_dword s27, s[44:47], 0x4b ; C20DAD4B buffer_load_format_xyzw v[18:21], v0, s[48:51], 0 idxen ; E00C2000 800C1200 s_buffer_load_dword s48, s[44:47], 0x4c ; C2182D4C s_buffer_load_dword s49, s[44:47], 0x4d ; C218AD4D s_buffer_load_dword s2, s[44:47], 0x30 ; C2012D30 s_buffer_load_dword s1, s[44:47], 0x31 ; C200AD31 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mov_b32_e32 v22, s0 ; 7E2C0200 s_buffer_load_dword s0, s[44:47], 0x32 ; C2002D32 buffer_load_format_xyzw v[23:26], v0, s[52:55], 0 idxen ; E00C2000 800D1700 s_buffer_load_dword s10, s[44:47], 0x34 ; C2052D34 s_buffer_load_dword s11, s[44:47], 0x35 ; C205AD35 s_buffer_load_dword s12, s[44:47], 0x36 ; C2062D36 s_buffer_load_dword s13, s[44:47], 0x38 ; C206AD38 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v25, s4 ; 7E320204 s_buffer_load_dword s14, s[44:47], 0x39 ; C2072D39 buffer_load_format_xyzw v[26:29], v0, s[56:59], 0 idxen ; E00C2000 800E1A00 s_buffer_load_dword s15, s[44:47], 0x3a ; C207AD3A s_buffer_load_dword s38, s[44:47], 0x3c ; C2132D3C s_buffer_load_dword s4, s[44:47], 0xf ; C2022D0F s_buffer_load_dword s16, s[44:47], 0x10 ; C2082D10 s_buffer_load_dword s18, s[44:47], 0x11 ; C2092D11 s_buffer_load_dword s19, s[44:47], 0x12 ; C209AD12 s_buffer_load_dword s8, s[44:47], 0x13 ; C2042D13 v_mov_b32_e32 v0, s13 ; 7E00020D v_add_f32_e32 v0, s10, v0 ; 0600000A s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v28, s14 ; 7E38020E v_add_f32_e32 v28, s11, v28 ; 0638380B s_buffer_load_dword s10, s[44:47], 0x29 ; C2052D29 v_mov_b32_e32 v29, s15 ; 7E3A020F v_add_f32_e32 v29, s12, v29 ; 063A3A0C s_buffer_load_dword s11, s[44:47], 0x2a ; C205AD2A s_buffer_load_dword s12, s[44:47], 0x2c ; C2062D2C v_mov_b32_e32 v30, s16 ; 7E3C0210 s_buffer_load_dword s13, s[44:47], 0x2d ; C206AD2D v_mov_b32_e32 v31, s18 ; 7E3E0212 s_buffer_load_dword s14, s[44:47], 0x2e ; C2072D2E v_mov_b32_e32 v32, s19 ; 7E400213 s_buffer_load_dword s28, s[44:47], 0x0 ; C20E2D00 s_buffer_load_dword s26, s[44:47], 0x1 ; C20D2D01 s_buffer_load_dword s25, s[44:47], 0x2 ; C20CAD02 s_buffer_load_dword s24, s[44:47], 0x3 ; C20C2D03 s_buffer_load_dword s34, s[44:47], 0x4 ; C2112D04 s_buffer_load_dword s33, s[44:47], 0x5 ; C210AD05 s_buffer_load_dword s32, s[44:47], 0x6 ; C2102D06 s_buffer_load_dword s30, s[44:47], 0x7 ; C20F2D07 s_buffer_load_dword s15, s[44:47], 0x8 ; C207AD08 s_buffer_load_dword s16, s[44:47], 0x9 ; C2082D09 s_buffer_load_dword s50, s[44:47], 0x4e ; C2192D4E s_buffer_load_dword s51, s[44:47], 0x4f ; C219AD4F s_buffer_load_dword s18, s[44:47], 0xa ; C2092D0A s_buffer_load_dword s19, s[44:47], 0xb ; C209AD0B s_buffer_load_dword s20, s[44:47], 0xc ; C20A2D0C s_buffer_load_dword s21, s[44:47], 0xd ; C20AAD0D s_buffer_load_dword s22, s[44:47], 0xe ; C20B2D0E s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v33, s50 ; 7E420232 v_mov_b32_e32 v34, s51 ; 7E440233 v_mul_f32_e32 v35, s60, v8 ; 1046103C v_mad_f32 v35, s62, v7, v35 ; D2820023 048E0E3E v_mad_f32 v35, s63, v9, v35 ; D2820023 048E123F v_mad_f32 v35, s64, v10, v35 ; D2820023 048E1440 v_subrev_f32_e32 v36, s61, v35 ; 0A48463D v_mad_f32 v22, v36, s65, v22 ; D2820016 04588324 v_mad_f32 v26, s48, v26, v34 ; D282001A 048A3430 v_mad_f32 v27, s49, v27, v33 ; D282001B 04863631 v_mad_f32 v25, v36, s67, v25 ; D2820019 04648724 v_add_f32_e32 v22, 0.5, v22 ; 062C2CF0 v_floor_f32_e32 v33, v22 ; 7E424916 v_subrev_f32_e32 v22, v33, v22 ; 0A2C2D21 v_mad_f32 v22, v22, v2, v1 ; D2820016 04060516 v_mul_f32_e32 v22, 0x3e22f983, v22 ; 102C2CFF 3E22F983 v_fract_f32_e32 v22, v22 ; 7E2C4116 v_sin_f32_e32 v22, v22 ; 7E2C6B16 v_mul_f32_e32 v22, s66, v22 ; 102C2C42 exp 15, 32, 0, 0, 0, v18, v19, v20, v21 ; F800020F 15141312 exp 15, 33, 0, 0, 0, v26, v27, v5, v5 ; F800021F 05051B1A s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v21, 0, v23 ; 102A2E80 exp 15, 34, 0, 0, 0, v23, v24, v21, v21 ; F800022F 15151817 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v21, 0.5, v25 ; 062A32F0 v_floor_f32_e32 v23, v21 ; 7E2E4915 v_subrev_f32_e32 v21, v23, v21 ; 0A2A2B17 v_mad_f32 v21, v21, v2, v1 ; D2820015 04060515 v_mul_f32_e32 v21, 0x3e22f983, v21 ; 102A2AFF 3E22F983 v_fract_f32_e32 v21, v21 ; 7E2A4115 v_sin_f32_e32 v21, v21 ; 7E2A6B15 v_mul_f32_e32 v21, s43, v21 ; 102A2A2B v_mul_f32_e32 v21, v21, v22 ; 102A2D15 v_mad_f32 v3, v21, v3, 0.5 ; D2820003 03C20715 v_floor_f32_e32 v21, v3 ; 7E2A4903 v_subrev_f32_e32 v3, v21, v3 ; 0A060715 v_mad_f32 v1, v3, v2, v1 ; D2820001 04060503 v_mul_f32_e32 v2, s39, v8 ; 10041027 v_mad_f32 v2, s40, v7, v2 ; D2820002 040A0E28 v_mad_f32 v2, s41, v9, v2 ; D2820002 040A1229 v_mad_f32 v2, s42, v10, v2 ; D2820002 040A142A v_subrev_f32_e32 v3, s36, v2 ; 0A060424 v_subrev_f32_e32 v21, v0, v3 ; 0A2A0700 v_mul_f32_e32 v21, s38, v21 ; 102A2A26 v_subrev_f32_e32 v22, v28, v36 ; 0A2C491C v_mad_f32 v21, s17, v22, v21 ; D2820015 04562C11 v_mul_f32_e32 v22, s31, v8 ; 102C101F v_mad_f32 v22, s23, v7, v22 ; D2820016 045A0E17 v_mad_f32 v22, s35, v9, v22 ; D2820016 045A1223 v_mad_f32 v22, s37, v10, v22 ; D2820016 045A1425 v_subrev_f32_e32 v23, s29, v22 ; 0A2E2C1D v_subrev_f32_e32 v24, v29, v23 ; 0A302F1D v_mad_f32 v21, s27, v24, v21 ; D2820015 0456301B v_mad_f32 v0, s38, v21, v0 ; D2820000 04022A26 v_mad_f32 v24, s17, v21, v28 ; D2820018 04722A11 v_mad_f32 v21, s27, v21, v29 ; D2820015 04762A1B v_subrev_f32_e32 v25, v24, v36 ; 0A324918 v_subrev_f32_e32 v26, v21, v23 ; 0A342F15 v_mul_f32_e32 v27, s27, v25 ; 1036321B v_mad_f32 v27, s17, v26, -v27 ; D282001B 846E3411 v_mul_f32_e32 v1, 0x3e22f983, v1 ; 100202FF 3E22F983 v_fract_f32_e32 v1, v1 ; 7E024101 v_sin_f32_e32 v28, v1 ; 7E386B01 v_mad_f32 v27, v28, v27, v0 ; D282001B 0402371C v_subrev_f32_e32 v0, v0, v3 ; 0A000700 v_mul_f32_e32 v29, s38, v26 ; 103A3426 v_mad_f32 v29, s27, v0, -v29 ; D282001D 8476001B v_mad_f32 v24, v28, v29, v24 ; D2820018 04623B1C v_mul_f32_e32 v29, s17, v0 ; 103A0011 v_mad_f32 v29, s38, v25, -v29 ; D282001D 84763226 v_mad_f32 v21, v28, v29, v21 ; D2820015 04563B1C v_cos_f32_e32 v1, v1 ; 7E026D01 v_mad_f32 v0, v0, v1, v27 ; D2820000 046E0300 v_mad_f32 v24, v25, v1, v24 ; D2820018 04620319 v_mad_f32 v1, v26, v1, v21 ; D2820001 0456031A v_subrev_f32_e32 v0, v3, v0 ; 0A000103 v_subrev_f32_e32 v3, v36, v24 ; 0A063124 v_subrev_f32_e32 v1, v23, v1 ; 0A020317 v_mad_f32 v0, v18, v0, v2 ; D2820000 040A0112 v_mad_f32 v2, v19, v3, v35 ; D2820002 048E0713 v_mad_f32 v1, v20, v1, v22 ; D2820001 045A0314 v_mul_f32_e32 v3, s6, v8 ; 10061006 v_mad_f32 v3, s5, v7, v3 ; D2820003 040E0E05 v_mad_f32 v3, s7, v9, v3 ; D2820003 040E1207 v_mad_f32 v3, s9, v10, v3 ; D2820003 040E1409 v_mad_f32 v7, v12, v4, -1.0 ; D2820007 03CE090C v_mad_f32 v8, v13, v4, -1.0 ; D2820008 03CE090D v_mad_f32 v9, v11, v4, -1.0 ; D2820009 03CE090B v_mad_f32 v10, v14, v4, -1.0 ; D282000A 03CE090E v_mad_f32 v11, v15, v4, -1.0 ; D282000B 03CE090F v_mad_f32 v12, v16, v4, -1.0 ; D282000C 03CE0910 v_mad_f32 v4, v17, v4, -1.0 ; D2820004 03CE0911 v_mul_f32_e32 v13, s34, v2 ; 101A0422 v_mad_f32 v13, s28, v0, v13 ; D282000D 0436001C v_mul_f32_e32 v14, s33, v2 ; 101C0421 v_mad_f32 v14, s26, v0, v14 ; D282000E 043A001A v_mul_f32_e32 v15, s32, v2 ; 101E0420 v_mad_f32 v15, s25, v0, v15 ; D282000F 043E0019 v_mul_f32_e32 v16, s30, v2 ; 1020041E v_mad_f32 v16, s24, v0, v16 ; D2820010 04420018 v_mad_f32 v0, -s8, v0, v30 ; D2820000 247A0008 v_mad_f32 v2, -s8, v2, v31 ; D2820002 247E0408 v_mul_f32_e32 v17, s12, v2 ; 1022040C v_mul_f32_e32 v18, s13, v2 ; 1024040D v_mul_f32_e32 v2, s14, v2 ; 1004040E v_mad_f32 v17, s3, v0, v17 ; D2820011 04460003 v_mad_f32 v18, s10, v0, v18 ; D2820012 044A000A v_mad_f32 v0, s11, v0, v2 ; D2820000 040A000B v_mad_f32 v2, -s8, v1, v32 ; D2820002 24820208 v_mad_f32 v13, s15, v1, v13 ; D282000D 0436020F v_mad_f32 v14, s16, v1, v14 ; D282000E 043A0210 v_mad_f32 v15, s18, v1, v15 ; D282000F 043E0212 v_mad_f32 v1, s19, v1, v16 ; D2820001 04420213 v_mad_f32 v13, s20, v3, v13 ; D282000D 04360614 v_mad_f32 v14, s21, v3, v14 ; D282000E 043A0615 v_mad_f32 v15, s22, v3, v15 ; D282000F 043E0616 v_mad_f32 v1, s4, v3, v1 ; D2820001 04060604 v_mul_f32_e32 v3, v12, v7 ; 10060F0C v_mad_f32 v3, v11, v8, -v3 ; D2820003 840E110B v_mul_f32_e32 v8, v10, v8 ; 1010110A v_mad_f32 v8, v12, v9, -v8 ; D2820008 8422130C v_mul_f32_e32 v9, v11, v9 ; 1012130B v_mad_f32 v7, v10, v7, -v9 ; D2820007 84260F0A v_mul_f32_e32 v3, v3, v4 ; 10060903 v_mul_f32_e32 v8, v8, v4 ; 10100908 v_mul_f32_e32 v7, v7, v4 ; 100E0907 v_mul_f32_e32 v9, v7, v11 ; 10121707 v_mad_f32 v9, v8, v12, -v9 ; D2820009 84261908 v_mul_f32_e32 v16, v3, v12 ; 10201903 v_mad_f32 v16, v7, v10, -v16 ; D2820010 84421507 v_mul_f32_e32 v19, v8, v10 ; 10261508 v_mad_f32 v19, v3, v11, -v19 ; D2820013 844E1703 v_mul_f32_e32 v9, v9, v4 ; 10120909 v_mad_f32 v17, s2, v2, v17 ; D2820011 04460402 v_mul_f32_e32 v20, s2, v3 ; 10280602 v_mul_f32_e32 v3, v17, v3 ; 10060711 v_mul_f32_e32 v21, s2, v10 ; 102A1402 v_mul_f32_e32 v10, v17, v10 ; 10141511 v_mul_f32_e32 v17, v17, v9 ; 10221311 v_mul_f32_e32 v9, s2, v9 ; 10121202 v_mad_f32 v21, v11, s1, v21 ; D2820015 0454030B v_mad_f32 v20, v8, s1, v20 ; D2820014 04500308 v_mul_f32_e32 v16, v16, v4 ; 10200910 v_mad_f32 v9, v16, s1, v9 ; D2820009 04240310 v_mad_f32 v18, s1, v2, v18 ; D2820012 044A0401 v_mad_f32 v16, v16, v18, v17 ; D2820010 04462510 v_mad_f32 v3, v8, v18, v3 ; D2820003 040E2508 v_mad_f32 v8, v11, v18, v10 ; D2820008 042A250B v_mul_f32_e32 v4, v19, v4 ; 10080913 v_mad_f32 v10, v7, s0, v20 ; D282000A 04500107 v_mad_f32 v0, s0, v2, v0 ; D2820000 04020400 exp 15, 35, 0, 0, 0, v13, v14, v15, v1 ; F800023F 010F0E0D v_mad_f32 v2, v4, v0, v16 ; D2820002 04420104 v_mad_f32 v3, v7, v0, v3 ; D2820003 040E0107 v_mad_f32 v0, v12, v0, v8 ; D2820000 0422010C exp 15, 36, 0, 0, 0, v2, v3, v0, v6 ; F800024F 06000302 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v4, s0, v9 ; D2820000 04240104 v_mad_f32 v2, v12, s0, v21 ; D2820002 0454010C exp 15, 37, 0, 0, 0, v0, v10, v2, v5 ; F800025F 05020A00 exp 15, 12, 0, 1, 0, v13, v14, v15, v1 ; F80008CF 010F0E0D s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 40 Code Size: 1480 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[5], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..7] DCL TEMP[0..1] IMM[0] FLT32 { -0.3333, 1.0000, 0.0039, 0.0000} 0: TEX TEMP[0], IN[0], SAMP[1], 2D 1: MOV TEMP[1].xy, IMM[0] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[6].yyyy, TEMP[1].xxxx 3: KILL_IF TEMP[0] 4: ADD TEMP[0].xyz, TEMP[1].yyyy, -CONST[0] 5: TEX TEMP[1], IN[0], SAMP[0], 2D 6: MUL TEMP[0].xyz, TEMP[0], TEMP[1] 7: MAD TEMP[0].xyz, TEMP[0], CONST[4].wwww, CONST[4] 8: MOV TEMP[1].xyz, CONST[0] 9: MAD OUT[0].xyz, TEMP[0], CONST[7], TEMP[1] 10: MUL OUT[0].w, IMM[0].zzzz, IN[1].wwww 11: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %35 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0 %37 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %40 = bitcast <8 x i32> addrspace(2)* %39 to <32 x i8> addrspace(2)* %41 = load <32 x i8>, <32 x i8> addrspace(2)* %40, align 32, !tbaa !0 %42 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %43 = bitcast <4 x i32> addrspace(2)* %42 to <16 x i8> addrspace(2)* %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %48 = bitcast float %45 to i32 %49 = bitcast float %46 to i32 %50 = insertelement <2 x i32> undef, i32 %48, i32 0 %51 = insertelement <2 x i32> %50, i32 %49, i32 1 %52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %41, <16 x i8> %44, i32 2) %53 = extractelement <4 x float> %52, i32 0 %54 = fmul float %53, %31 %55 = fadd float %54, 0xBFD554C980000000 %56 = fmul float %53, %31 %57 = fadd float %56, 0xBFD554C980000000 %58 = fmul float %53, %31 %59 = fadd float %58, 0xBFD554C980000000 %60 = fmul float %53, %31 %61 = fadd float %60, 0xBFD554C980000000 %62 = fcmp olt float %55, 0.000000e+00 %63 = fcmp olt float %57, 0.000000e+00 %64 = fcmp olt float %59, 0.000000e+00 %65 = fcmp olt float %61, 0.000000e+00 %66 = or i1 %65, %64 %67 = or i1 %66, %63 %68 = or i1 %67, %62 %69 = select i1 %68, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %69) %70 = fsub float 1.000000e+00, %24 %71 = fsub float 1.000000e+00, %25 %72 = fsub float 1.000000e+00, %26 %73 = bitcast float %45 to i32 %74 = bitcast float %46 to i32 %75 = insertelement <2 x i32> undef, i32 %73, i32 0 %76 = insertelement <2 x i32> %75, i32 %74, i32 1 %77 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %76, <32 x i8> %36, <16 x i8> %38, i32 2) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = extractelement <4 x float> %77, i32 2 %81 = fmul float %70, %78 %82 = fmul float %71, %79 %83 = fmul float %72, %80 %84 = fmul float %81, %30 %85 = fadd float %84, %27 %86 = fmul float %82, %30 %87 = fadd float %86, %28 %88 = fmul float %83, %30 %89 = fadd float %88, %29 %90 = fmul float %85, %32 %91 = fadd float %90, %24 %92 = fmul float %87, %33 %93 = fadd float %92, %25 %94 = fmul float %89, %34 %95 = fadd float %94, %26 %96 = fmul float %47, 3.906250e-03 %97 = call i32 @llvm.SI.packf16(float %91, float %93) %98 = bitcast i32 %97 to float %99 = call i32 @llvm.SI.packf16(float %95, float %96) %100 = bitcast i32 %99 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %98, float %100, float %98, float %100) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[12:15] ; F0800100 00640102 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 v_mov_b32_e32 v4, 0xbeaaa64c ; 7E0802FF BEAAA64C s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_buffer_load_dword s5, s[0:3], 0x1d ; C202811D s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v1, s8, v1, v4 ; D2820001 04120208 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, 0, -1.0, vcc ; D2000001 01A9E680 v_cndmask_b32_e64 v1, v1, -1.0, vcc ; D2000001 01A9E701 v_cndmask_b32_e64 v1, v1, -1.0, vcc ; D2000001 01A9E701 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 v_cndmask_b32_e64 v1, v1, -1.0, vcc ; D2000001 01A9E701 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 s_buffer_load_dword s8, s[0:3], 0x2 ; C2040102 v_cmpx_le_f32_e32 vcc, 0, v1 ; 7C260280 s_buffer_load_dword s9, s[0:3], 0x13 ; C2048113 s_buffer_load_dword s10, s[0:3], 0x10 ; C2050110 s_buffer_load_dword s11, s[0:3], 0x11 ; C2058111 s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[12:15] ; F0800700 00640102 s_buffer_load_dword s0, s[0:3], 0x1e ; C200011E v_sub_f32_e64 v4, 1.0, s6 ; D2080004 00000CF2 v_sub_f32_e64 v5, 1.0, s7 ; D2080005 00000EF2 v_sub_f32_e64 v6, 1.0, s8 ; D2080006 000010F2 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v1, v4 ; 10020901 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_mov_b32_e32 v4, s10 ; 7E08020A v_mad_f32 v1, s9, v1, v4 ; D2820001 04120209 v_mov_b32_e32 v4, s11 ; 7E08020B v_mad_f32 v2, s9, v2, v4 ; D2820002 04120409 v_mov_b32_e32 v4, s24 ; 7E080218 v_mad_f32 v3, s9, v3, v4 ; D2820003 04120609 v_mov_b32_e32 v4, s6 ; 7E080206 v_mad_f32 v1, s4, v1, v4 ; D2820001 04120204 v_mov_b32_e32 v4, s7 ; 7E080207 v_mad_f32 v2, s5, v2, v4 ; D2820002 04120405 v_mov_b32_e32 v4, s8 ; 7E080208 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s0, v3, v4 ; D2820003 04120600 v_mul_f32_e32 v0, 0x3b800000, v0 ; 100000FF 3B800000 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v0, v3, v0 ; 5E000103 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 324 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2].xy, TEXCOORD[0] DCL OUT[3], TEXCOORD[1] DCL OUT[4].xyz, TEXCOORD[4] DCL OUT[5], TEXCOORD[5] DCL OUT[6], TEXCOORD[6] DCL OUT[7], TEXCOORD[7] DCL CONST[0..20] DCL TEMP[0..6] IMM[0] FLT32 { 0.0078, -1.0000, 0.5000, 0.1592} IMM[1] FLT32 { 6.2832, -3.1416, 1.0000, 0.0000} 0: MUL TEMP[0], CONST[7], IN[0].yyyy 1: MAD TEMP[0], CONST[6], IN[0].xxxx, TEMP[0] 2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[9], IN[0].wwww, TEMP[0] 4: ADD TEMP[1].xyz, TEMP[0], -CONST[5] 5: MOV TEMP[2].yw, CONST[17] 6: MAD TEMP[1].w, CONST[16].zzzz, TEMP[1].yyyy, TEMP[2].yyyy 7: ADD TEMP[1].w, TEMP[1].wwww, IMM[0].zzzz 8: FRC TEMP[1].w, TEMP[1].wwww 9: MAD TEMP[1].w, TEMP[1].wwww, IMM[1].xxxx, IMM[1].yyyy 10: SCS TEMP[3].y, TEMP[1].wwww 11: MUL TEMP[1].w, TEMP[3].yyyy, CONST[17].zzzz 12: MAD TEMP[2].x, TEMP[2].wwww, TEMP[1].yyyy, CONST[18].yyyy 13: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz 14: FRC TEMP[2].x, TEMP[2].xxxx 15: MAD TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx, IMM[1].yyyy 16: SCS TEMP[3].y, TEMP[2].xxxx 17: MUL TEMP[2].x, TEMP[3].yyyy, CONST[18].zzzz 18: MUL TEMP[1].w, TEMP[1].wwww, TEMP[2].xxxx 19: MAD TEMP[1].w, TEMP[1].wwww, IMM[0].wwww, IMM[0].zzzz 20: FRC TEMP[1].w, TEMP[1].wwww 21: MAD TEMP[1].w, TEMP[1].wwww, IMM[1].xxxx, IMM[1].yyyy 22: SCS TEMP[2].xy, TEMP[1].wwww 23: MOV TEMP[3].xyz, CONST[13] 24: ADD TEMP[3].xyz, TEMP[3], CONST[14] 25: ADD TEMP[4].xyz, TEMP[1], -TEMP[3] 26: MOV TEMP[5].yz, CONST[15].xxyw 27: MOV TEMP[5].x, CONST[18].wwww 28: DP3 TEMP[1].w, TEMP[5].yzxw, TEMP[4] 29: MAD TEMP[3].xyz, TEMP[5].yzxw, TEMP[1].wwww, TEMP[3] 30: ADD TEMP[4].xyz, TEMP[1], -TEMP[3] 31: MUL TEMP[6].xyz, TEMP[4].yzxw, TEMP[5] 32: MAD TEMP[5].xyz, TEMP[5].zxyw, TEMP[4].zxyw, -TEMP[6] 33: MUL TEMP[2].yzw, TEMP[2].yyyy, TEMP[5].xxyz 34: MAD TEMP[2].xyz, TEMP[4], TEMP[2].xxxx, TEMP[2].yzww 35: ADD TEMP[2].xyz, TEMP[2], TEMP[3] 36: ADD TEMP[1].xyz, -TEMP[1], TEMP[2] 37: MAD TEMP[0].xyz, IN[3], TEMP[1], TEMP[0] 38: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 39: MAD TEMP[1], CONST[0], TEMP[0].xxxx, TEMP[1] 40: MAD TEMP[1], CONST[2], TEMP[0].zzzz, TEMP[1] 41: MAD OUT[0], CONST[3], TEMP[0].wwww, TEMP[1] 42: MAD OUT[2].xy, IN[5], CONST[19], CONST[19].wzzw 43: MAD TEMP[1].xyz, TEMP[0], -CONST[4].wwww, CONST[4] 44: MOV OUT[7], TEMP[0] 45: MUL TEMP[0].xyz, TEMP[1].yyyy, CONST[11] 46: MAD TEMP[0].xyz, CONST[10], TEMP[1].xxxx, TEMP[0] 47: MAD TEMP[0].xyz, CONST[12], TEMP[1].zzzz, TEMP[0] 48: MAD TEMP[1].xyz, IN[1].yzxw, IMM[0].xxxx, IMM[0].yyyy 49: MAD TEMP[2], IN[2], IMM[0].xxxx, IMM[0].yyyy 50: MUL TEMP[3].xyz, TEMP[1], TEMP[2].zxyw 51: MAD TEMP[1].xyz, TEMP[2].yzxw, TEMP[1].yzxw, -TEMP[3] 52: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[1] 53: MUL TEMP[3].xyz, TEMP[2].yzxw, TEMP[1].zxyw 54: MAD TEMP[3].xyz, TEMP[1].yzxw, TEMP[2].zxyw, -TEMP[3] 55: MUL TEMP[3].xyz, TEMP[2].wwww, TEMP[3] 56: DP3 OUT[6].x, TEMP[3], TEMP[0] 57: DP3 OUT[6].y, TEMP[1], TEMP[0] 58: DP3 OUT[6].z, TEMP[2], TEMP[0] 59: MOV TEMP[0].xyz, CONST[20] 60: MUL TEMP[4].xyz, TEMP[0].yyyy, CONST[11] 61: MAD TEMP[0].xyw, CONST[10].xyzz, TEMP[0].xxxx, TEMP[4].xyzz 62: MAD TEMP[0].xyz, CONST[12], TEMP[0].zzzz, TEMP[0].xyww 63: DP3 OUT[4].x, TEMP[3], TEMP[0] 64: DP3 OUT[4].y, TEMP[1], TEMP[0] 65: DP3 OUT[4].z, TEMP[2], TEMP[0] 66: MOV OUT[1], IN[3] 67: MUL OUT[3], IMM[1].zzww, IN[4].xyxx 68: MOV OUT[5], IMM[1].wwww 69: MOV OUT[6].w, -IMM[0].yyyy 70: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 %85 = add i32 %5, %7 %86 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %85) %87 = extractelement <4 x float> %86, i32 0 %88 = extractelement <4 x float> %86, i32 1 %89 = extractelement <4 x float> %86, i32 2 %90 = extractelement <4 x float> %86, i32 3 %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0 %93 = add i32 %5, %7 %94 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %92, i32 0, i32 %93) %95 = extractelement <4 x float> %94, i32 0 %96 = extractelement <4 x float> %94, i32 1 %97 = extractelement <4 x float> %94, i32 2 %98 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %99 = load <16 x i8>, <16 x i8> addrspace(2)* %98, align 16, !tbaa !0 %100 = add i32 %5, %7 %101 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %99, i32 0, i32 %100) %102 = extractelement <4 x float> %101, i32 0 %103 = extractelement <4 x float> %101, i32 1 %104 = extractelement <4 x float> %101, i32 2 %105 = extractelement <4 x float> %101, i32 3 %106 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %107 = load <16 x i8>, <16 x i8> addrspace(2)* %106, align 16, !tbaa !0 %108 = add i32 %5, %7 %109 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %107, i32 0, i32 %108) %110 = extractelement <4 x float> %109, i32 0 %111 = extractelement <4 x float> %109, i32 1 %112 = extractelement <4 x float> %109, i32 2 %113 = extractelement <4 x float> %109, i32 3 %114 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %115 = load <16 x i8>, <16 x i8> addrspace(2)* %114, align 16, !tbaa !0 %116 = add i32 %5, %7 %117 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %115, i32 0, i32 %116) %118 = extractelement <4 x float> %117, i32 0 %119 = extractelement <4 x float> %117, i32 1 %120 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 5 %121 = load <16 x i8>, <16 x i8> addrspace(2)* %120, align 16, !tbaa !0 %122 = add i32 %5, %7 %123 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %121, i32 0, i32 %122) %124 = extractelement <4 x float> %123, i32 0 %125 = extractelement <4 x float> %123, i32 1 %126 = fmul float %40, %88 %127 = fmul float %41, %88 %128 = fmul float %42, %88 %129 = fmul float %43, %88 %130 = fmul float %36, %87 %131 = fadd float %130, %126 %132 = fmul float %37, %87 %133 = fadd float %132, %127 %134 = fmul float %38, %87 %135 = fadd float %134, %128 %136 = fmul float %39, %87 %137 = fadd float %136, %129 %138 = fmul float %44, %89 %139 = fadd float %138, %131 %140 = fmul float %45, %89 %141 = fadd float %140, %133 %142 = fmul float %46, %89 %143 = fadd float %142, %135 %144 = fmul float %47, %89 %145 = fadd float %144, %137 %146 = fmul float %48, %90 %147 = fadd float %146, %139 %148 = fmul float %49, %90 %149 = fadd float %148, %141 %150 = fmul float %50, %90 %151 = fadd float %150, %143 %152 = fmul float %51, %90 %153 = fadd float %152, %145 %154 = fsub float %147, %33 %155 = fsub float %149, %34 %156 = fsub float %151, %35 %157 = fmul float %69, %155 %158 = fadd float %157, %70 %159 = fadd float %158, 5.000000e-01 %160 = call float @llvm.AMDIL.fraction.(float %159) %161 = fmul float %160, 0x401921FB60000000 %162 = fadd float %161, 0xC00921FB60000000 %163 = call float @llvm.sin.f32(float %162) %164 = fmul float %163, %71 %165 = fmul float %72, %155 %166 = fadd float %165, %73 %167 = fadd float %166, 5.000000e-01 %168 = call float @llvm.AMDIL.fraction.(float %167) %169 = fmul float %168, 0x401921FB60000000 %170 = fadd float %169, 0xC00921FB60000000 %171 = call float @llvm.sin.f32(float %170) %172 = fmul float %171, %74 %173 = fmul float %164, %172 %174 = fmul float %173, 0x3FC45F3060000000 %175 = fadd float %174, 5.000000e-01 %176 = call float @llvm.AMDIL.fraction.(float %175) %177 = fmul float %176, 0x401921FB60000000 %178 = fadd float %177, 0xC00921FB60000000 %179 = call float @llvm.cos.f32(float %178) %180 = call float @llvm.sin.f32(float %178) %181 = fadd float %61, %64 %182 = fadd float %62, %65 %183 = fadd float %63, %66 %184 = fsub float %154, %181 %185 = fsub float %155, %182 %186 = fsub float %156, %183 %187 = fmul float %67, %184 %188 = fmul float %68, %185 %189 = fadd float %188, %187 %190 = fmul float %75, %186 %191 = fadd float %189, %190 %192 = fmul float %67, %191 %193 = fadd float %192, %181 %194 = fmul float %68, %191 %195 = fadd float %194, %182 %196 = fmul float %75, %191 %197 = fadd float %196, %183 %198 = fsub float %154, %193 %199 = fsub float %155, %195 %200 = fsub float %156, %197 %201 = fmul float %199, %75 %202 = fmul float %200, %67 %203 = fmul float %198, %68 %204 = fmul float %68, %200 %205 = fsub float %204, %201 %206 = fmul float %75, %198 %207 = fsub float %206, %202 %208 = fmul float %67, %199 %209 = fsub float %208, %203 %210 = fmul float %180, %205 %211 = fmul float %180, %207 %212 = fmul float %180, %209 %213 = fmul float %198, %179 %214 = fadd float %213, %210 %215 = fmul float %199, %179 %216 = fadd float %215, %211 %217 = fmul float %200, %179 %218 = fadd float %217, %212 %219 = fadd float %214, %193 %220 = fadd float %216, %195 %221 = fadd float %218, %197 %222 = fsub float %219, %154 %223 = fsub float %220, %155 %224 = fsub float %221, %156 %225 = fmul float %110, %222 %226 = fadd float %225, %147 %227 = fmul float %111, %223 %228 = fadd float %227, %149 %229 = fmul float %112, %224 %230 = fadd float %229, %151 %231 = fmul float %228, %17 %232 = fmul float %228, %18 %233 = fmul float %228, %19 %234 = fmul float %228, %20 %235 = fmul float %13, %226 %236 = fadd float %235, %231 %237 = fmul float %14, %226 %238 = fadd float %237, %232 %239 = fmul float %15, %226 %240 = fadd float %239, %233 %241 = fmul float %16, %226 %242 = fadd float %241, %234 %243 = fmul float %21, %230 %244 = fadd float %243, %236 %245 = fmul float %22, %230 %246 = fadd float %245, %238 %247 = fmul float %23, %230 %248 = fadd float %247, %240 %249 = fmul float %24, %230 %250 = fadd float %249, %242 %251 = fmul float %25, %153 %252 = fadd float %251, %244 %253 = fmul float %26, %153 %254 = fadd float %253, %246 %255 = fmul float %27, %153 %256 = fadd float %255, %248 %257 = fmul float %28, %153 %258 = fadd float %257, %250 %259 = fmul float %124, %76 %260 = fadd float %259, %79 %261 = fmul float %125, %77 %262 = fadd float %261, %78 %263 = fmul float %32, %226 %264 = fsub float %29, %263 %265 = fmul float %32, %228 %266 = fsub float %30, %265 %267 = fmul float %32, %230 %268 = fsub float %31, %267 %269 = fmul float %266, %55 %270 = fmul float %266, %56 %271 = fmul float %266, %57 %272 = fmul float %52, %264 %273 = fadd float %272, %269 %274 = fmul float %53, %264 %275 = fadd float %274, %270 %276 = fmul float %54, %264 %277 = fadd float %276, %271 %278 = fmul float %58, %268 %279 = fadd float %278, %273 %280 = fmul float %59, %268 %281 = fadd float %280, %275 %282 = fmul float %60, %268 %283 = fadd float %282, %277 %284 = fmul float %96, 0x3F80101020000000 %285 = fadd float %284, -1.000000e+00 %286 = fmul float %97, 0x3F80101020000000 %287 = fadd float %286, -1.000000e+00 %288 = fmul float %95, 0x3F80101020000000 %289 = fadd float %288, -1.000000e+00 %290 = fmul float %102, 0x3F80101020000000 %291 = fadd float %290, -1.000000e+00 %292 = fmul float %103, 0x3F80101020000000 %293 = fadd float %292, -1.000000e+00 %294 = fmul float %104, 0x3F80101020000000 %295 = fadd float %294, -1.000000e+00 %296 = fmul float %105, 0x3F80101020000000 %297 = fadd float %296, -1.000000e+00 %298 = fmul float %285, %295 %299 = fmul float %287, %291 %300 = fmul float %289, %293 %301 = fmul float %293, %287 %302 = fsub float %301, %298 %303 = fmul float %295, %289 %304 = fsub float %303, %299 %305 = fmul float %291, %285 %306 = fsub float %305, %300 %307 = fmul float %297, %302 %308 = fmul float %297, %304 %309 = fmul float %297, %306 %310 = fmul float %293, %309 %311 = fmul float %295, %307 %312 = fmul float %291, %308 %313 = fmul float %308, %295 %314 = fsub float %313, %310 %315 = fmul float %309, %291 %316 = fsub float %315, %311 %317 = fmul float %307, %293 %318 = fsub float %317, %312 %319 = fmul float %297, %314 %320 = fmul float %297, %316 %321 = fmul float %297, %318 %322 = fmul float %319, %279 %323 = fmul float %320, %281 %324 = fadd float %323, %322 %325 = fmul float %321, %283 %326 = fadd float %324, %325 %327 = fmul float %307, %279 %328 = fmul float %308, %281 %329 = fadd float %328, %327 %330 = fmul float %309, %283 %331 = fadd float %329, %330 %332 = fmul float %291, %279 %333 = fmul float %293, %281 %334 = fadd float %333, %332 %335 = fmul float %295, %283 %336 = fadd float %334, %335 %337 = fmul float %81, %55 %338 = fmul float %81, %56 %339 = fmul float %81, %57 %340 = fmul float %52, %80 %341 = fadd float %340, %337 %342 = fmul float %53, %80 %343 = fadd float %342, %338 %344 = fmul float %54, %80 %345 = fadd float %344, %339 %346 = fmul float %58, %82 %347 = fadd float %346, %341 %348 = fmul float %59, %82 %349 = fadd float %348, %343 %350 = fmul float %60, %82 %351 = fadd float %350, %345 %352 = fmul float %319, %347 %353 = fmul float %320, %349 %354 = fadd float %353, %352 %355 = fmul float %321, %351 %356 = fadd float %354, %355 %357 = fmul float %307, %347 %358 = fmul float %308, %349 %359 = fadd float %358, %357 %360 = fmul float %309, %351 %361 = fadd float %359, %360 %362 = fmul float %291, %347 %363 = fmul float %293, %349 %364 = fadd float %363, %362 %365 = fmul float %295, %351 %366 = fadd float %364, %365 %367 = fmul float %118, 0.000000e+00 %368 = fmul float %118, 0.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %110, float %111, float %112, float %113) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %260, float %262, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %118, float %119, float %367, float %368) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %356, float %361, float %366, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %326, float %331, float %336, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %226, float %228, float %230, float %153) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %252, float %254, float %256, float %258) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_load_dwordx4 s[40:43], s[8:9], 0x0 ; C0940900 s_load_dwordx4 s[44:47], s[8:9], 0x4 ; C0960904 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 0xc0490fdb ; 7E0202FF C0490FDB v_mov_b32_e32 v2, 0x40c90fdb ; 7E0402FF 40C90FDB v_mov_b32_e32 v3, 0x3e22f983 ; 7E0602FF 3E22F983 v_mov_b32_e32 v4, 0x3c008081 ; 7E0802FF 3C008081 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_load_dwordx4 s[48:51], s[8:9], 0x8 ; C0980908 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s22, s[12:15], 0x1a ; C20B0D1A s_buffer_load_dword s21, s[12:15], 0x1b ; C20A8D1B s_buffer_load_dword s35, s[12:15], 0x1c ; C2118D1C s_buffer_load_dword s33, s[12:15], 0x1d ; C2108D1D s_buffer_load_dword s25, s[12:15], 0x1e ; C20C8D1E s_buffer_load_dword s10, s[12:15], 0x14 ; C2050D14 s_buffer_load_dword s17, s[12:15], 0x15 ; C2088D15 s_buffer_load_dword s7, s[12:15], 0x16 ; C2038D16 s_buffer_load_dword s38, s[12:15], 0x18 ; C2130D18 s_buffer_load_dword s37, s[12:15], 0x19 ; C2128D19 s_buffer_load_dword s36, s[12:15], 0x1f ; C2120D1F s_buffer_load_dword s34, s[12:15], 0x20 ; C2110D20 s_buffer_load_dword s31, s[12:15], 0x21 ; C20F8D21 s_buffer_load_dword s32, s[12:15], 0x22 ; C2100D22 s_buffer_load_dword s29, s[12:15], 0x23 ; C20E8D23 s_buffer_load_dword s30, s[12:15], 0x24 ; C20F0D24 s_buffer_load_dword s26, s[12:15], 0x25 ; C20D0D25 s_buffer_load_dword s27, s[12:15], 0x26 ; C20D8D26 s_buffer_load_dword s24, s[12:15], 0x27 ; C20C0D27 s_buffer_load_dword s1, s[12:15], 0x28 ; C2008D28 s_buffer_load_dword s5, s[12:15], 0x3d ; C2028D3D s_buffer_load_dword s28, s[12:15], 0x42 ; C20E0D42 s_buffer_load_dword s0, s[12:15], 0x45 ; C2000D45 s_buffer_load_dword s18, s[12:15], 0x46 ; C2090D46 s_buffer_load_dword s23, s[12:15], 0x47 ; C20B8D47 s_buffer_load_dword s11, s[12:15], 0x49 ; C2058D49 s_buffer_load_dword s16, s[12:15], 0x4a ; C2080D4A s_buffer_load_dword s6, s[12:15], 0x4b ; C2030D4B s_buffer_load_dword s52, s[12:15], 0x4c ; C21A0D4C s_buffer_load_dword s53, s[12:15], 0x4d ; C21A8D4D s_buffer_load_dword s2, s[12:15], 0x30 ; C2010D30 s_buffer_load_dword s3, s[12:15], 0x31 ; C2018D31 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s0 ; 7E0C0200 s_buffer_load_dword s4, s[12:15], 0x32 ; C2020D32 s_buffer_load_dword s54, s[12:15], 0x34 ; C21B0D34 s_buffer_load_dword s55, s[12:15], 0x35 ; C21B8D35 v_mov_b32_e32 v7, s11 ; 7E0E020B s_buffer_load_dword s56, s[12:15], 0x36 ; C21C0D36 s_buffer_load_dword s57, s[12:15], 0x38 ; C21C8D38 s_buffer_load_dword s58, s[12:15], 0x39 ; C21D0D39 s_buffer_load_dword s59, s[12:15], 0x3a ; C21D8D3A s_buffer_load_dword s11, s[12:15], 0x3c ; C2058D3C s_buffer_load_dword s0, s[12:15], 0xf ; C2000D0F s_buffer_load_dword s60, s[12:15], 0x4e ; C21E0D4E s_buffer_load_dword s61, s[12:15], 0x4f ; C21E8D4F s_buffer_load_dword s20, s[12:15], 0x50 ; C20A0D50 s_buffer_load_dword s39, s[12:15], 0x51 ; C2138D51 s_buffer_load_dword s19, s[12:15], 0x52 ; C2098D52 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s57 ; 7E100239 v_add_f32_e32 v8, s54, v8 ; 06101036 v_mov_b32_e32 v9, s58 ; 7E12023A v_add_f32_e32 v9, s55, v9 ; 06121237 v_mov_b32_e32 v10, s59 ; 7E14023B v_add_f32_e32 v10, s56, v10 ; 06141438 s_load_dwordx4 s[56:59], s[8:9], 0xc ; C09C090C s_load_dwordx4 s[64:67], s[8:9], 0x10 ; C0A00910 buffer_load_format_xyzw v[11:14], v0, s[40:43], 0 idxen ; E00C2000 800A0B00 buffer_load_format_xyzw v[15:18], v0, s[44:47], 0 idxen ; E00C2000 800B0F00 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[18:21], v0, s[48:51], 0 idxen ; E00C2000 800C1200 s_load_dwordx4 s[40:43], s[8:9], 0x14 ; C0940914 buffer_load_format_xyzw v[22:25], v0, s[56:59], 0 idxen ; E00C2000 800E1600 buffer_load_format_xyzw v[26:29], v0, s[64:67], 0 idxen ; E00C2000 80101A00 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[28:31], v0, s[40:43], 0 idxen ; E00C2000 800A1C00 v_mov_b32_e32 v0, s61 ; 7E00023D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, s52, v28, v0 ; D2820000 04023834 v_mov_b32_e32 v28, s60 ; 7E38023C v_mad_f32 v28, s53, v29, v28 ; D282001C 04723A35 exp 15, 32, 0, 0, 0, v22, v23, v24, v25 ; F800020F 19181716 exp 15, 33, 0, 0, 0, v0, v28, v5, v5 ; F800021F 05051C00 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, 0, v26 ; 10003480 exp 15, 34, 0, 0, 0, v26, v27, v0, v0 ; F800022F 00001B1A s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s35, v12 ; 10001823 v_mad_f32 v0, s38, v11, v0 ; D2820000 04021626 v_mul_f32_e32 v25, s33, v12 ; 10321821 v_mad_f32 v25, s37, v11, v25 ; D2820019 04661625 v_mul_f32_e32 v26, s25, v12 ; 10341819 v_mad_f32 v26, s22, v11, v26 ; D282001A 046A1616 v_mul_f32_e32 v12, s36, v12 ; 10181824 v_mad_f32 v11, s21, v11, v12 ; D282000B 04321615 v_mad_f32 v0, s34, v13, v0 ; D2820000 04021A22 v_mad_f32 v12, s31, v13, v25 ; D282000C 04661A1F v_mad_f32 v25, s32, v13, v26 ; D2820019 046A1A20 v_mad_f32 v11, s29, v13, v11 ; D282000B 042E1A1D v_mad_f32 v0, s30, v14, v0 ; D2820000 04021C1E v_mad_f32 v12, s26, v14, v12 ; D282000C 04321C1A v_mad_f32 v13, s27, v14, v25 ; D282000D 04661C1B v_mad_f32 v11, s24, v14, v11 ; D282000B 042E1C18 v_subrev_f32_e32 v14, s17, v12 ; 0A1C1811 v_mad_f32 v6, v14, s28, v6 ; D2820006 0418390E v_add_f32_e32 v6, 0.5, v6 ; 060C0CF0 v_floor_f32_e32 v25, v6 ; 7E324906 v_subrev_f32_e32 v6, v25, v6 ; 0A0C0D19 v_mad_f32 v7, v14, s23, v7 ; D2820007 041C2F0E v_add_f32_e32 v7, 0.5, v7 ; 060E0EF0 v_floor_f32_e32 v25, v7 ; 7E324907 v_subrev_f32_e32 v7, v25, v7 ; 0A0E0F19 v_mad_f32 v6, v6, v2, v1 ; D2820006 04060506 v_mul_f32_e32 v6, 0x3e22f983, v6 ; 100C0CFF 3E22F983 v_fract_f32_e32 v6, v6 ; 7E0C4106 v_sin_f32_e32 v6, v6 ; 7E0C6B06 v_mul_f32_e32 v6, s18, v6 ; 100C0C12 v_mad_f32 v7, v7, v2, v1 ; D2820007 04060507 v_mul_f32_e32 v7, 0x3e22f983, v7 ; 100E0EFF 3E22F983 v_fract_f32_e32 v7, v7 ; 7E0E4107 v_sin_f32_e32 v7, v7 ; 7E0E6B07 v_mul_f32_e32 v7, s16, v7 ; 100E0E10 v_mul_f32_e32 v6, v7, v6 ; 100C0D07 v_mad_f32 v3, v6, v3, 0.5 ; D2820003 03C20706 v_floor_f32_e32 v6, v3 ; 7E0C4903 v_subrev_f32_e32 v3, v6, v3 ; 0A060706 v_mad_f32 v1, v3, v2, v1 ; D2820001 04060503 v_subrev_f32_e32 v2, s10, v0 ; 0A04000A v_subrev_f32_e32 v3, v8, v2 ; 0A060508 v_mul_f32_e32 v3, s11, v3 ; 1006060B v_subrev_f32_e32 v6, v9, v14 ; 0A0C1D09 v_mad_f32 v3, s5, v6, v3 ; D2820003 040E0C05 v_subrev_f32_e32 v6, s7, v13 ; 0A0C1A07 v_subrev_f32_e32 v7, v10, v6 ; 0A0E0D0A v_mad_f32 v3, s6, v7, v3 ; D2820003 040E0E06 v_mad_f32 v7, s11, v3, v8 ; D2820007 0422060B v_mad_f32 v8, s5, v3, v9 ; D2820008 04260605 v_mad_f32 v3, s6, v3, v10 ; D2820003 042A0606 v_subrev_f32_e32 v9, v8, v14 ; 0A121D08 v_subrev_f32_e32 v10, v3, v6 ; 0A140D03 v_mul_f32_e32 v25, s6, v9 ; 10321206 v_mad_f32 v25, s5, v10, -v25 ; D2820019 84661405 v_mul_f32_e32 v1, 0x3e22f983, v1 ; 100202FF 3E22F983 v_fract_f32_e32 v1, v1 ; 7E024101 v_sin_f32_e32 v26, v1 ; 7E346B01 v_mad_f32 v25, v26, v25, v7 ; D2820019 041E331A v_subrev_f32_e32 v7, v7, v2 ; 0A0E0507 v_mul_f32_e32 v27, s11, v10 ; 1036140B v_mad_f32 v27, s6, v7, -v27 ; D282001B 846E0E06 v_mad_f32 v8, v26, v27, v8 ; D2820008 0422371A v_mul_f32_e32 v27, s5, v7 ; 10360E05 v_mad_f32 v27, s11, v9, -v27 ; D282001B 846E120B v_mad_f32 v3, v26, v27, v3 ; D2820003 040E371A v_cos_f32_e32 v1, v1 ; 7E026D01 v_mad_f32 v7, v7, v1, v25 ; D2820007 04660307 v_mad_f32 v8, v9, v1, v8 ; D2820008 04220309 v_mad_f32 v1, v10, v1, v3 ; D2820001 040E030A v_subrev_f32_e32 v2, v2, v7 ; 0A040F02 v_subrev_f32_e32 v3, v14, v8 ; 0A06110E v_subrev_f32_e32 v1, v6, v1 ; 0A020306 v_mad_f32 v0, v22, v2, v0 ; D2820000 04020516 v_mad_f32 v2, v23, v3, v12 ; D2820002 04320717 v_mad_f32 v1, v24, v1, v13 ; D2820001 04360318 v_mad_f32 v3, v16, v4, -1.0 ; D2820003 03CE0910 v_mad_f32 v6, v17, v4, -1.0 ; D2820006 03CE0911 v_mad_f32 v7, v15, v4, -1.0 ; D2820007 03CE090F v_mad_f32 v8, v18, v4, -1.0 ; D2820008 03CE0912 v_mad_f32 v9, v19, v4, -1.0 ; D2820009 03CE0913 v_mad_f32 v10, v20, v4, -1.0 ; D282000A 03CE0914 v_mad_f32 v4, v21, v4, -1.0 ; D2820004 03CE0915 s_buffer_load_dword s5, s[12:15], 0x0 ; C2028D00 s_buffer_load_dword s6, s[12:15], 0x1 ; C2030D01 s_buffer_load_dword s7, s[12:15], 0x2 ; C2038D02 s_buffer_load_dword s8, s[12:15], 0x3 ; C2040D03 s_buffer_load_dword s9, s[12:15], 0x4 ; C2048D04 s_buffer_load_dword s10, s[12:15], 0x5 ; C2050D05 s_buffer_load_dword s11, s[12:15], 0x6 ; C2058D06 s_buffer_load_dword s16, s[12:15], 0x7 ; C2080D07 s_buffer_load_dword s17, s[12:15], 0x8 ; C2088D08 s_buffer_load_dword s18, s[12:15], 0x9 ; C2090D09 s_buffer_load_dword s21, s[12:15], 0xa ; C20A8D0A s_buffer_load_dword s22, s[12:15], 0xb ; C20B0D0B s_buffer_load_dword s23, s[12:15], 0xc ; C20B8D0C s_buffer_load_dword s24, s[12:15], 0xd ; C20C0D0D s_buffer_load_dword s25, s[12:15], 0xe ; C20C8D0E s_buffer_load_dword s26, s[12:15], 0x29 ; C20D0D29 s_buffer_load_dword s27, s[12:15], 0x2a ; C20D8D2A s_buffer_load_dword s28, s[12:15], 0x2c ; C20E0D2C s_buffer_load_dword s29, s[12:15], 0x2d ; C20E8D2D s_buffer_load_dword s30, s[12:15], 0x2e ; C20F0D2E s_buffer_load_dword s31, s[12:15], 0x10 ; C20F8D10 s_buffer_load_dword s32, s[12:15], 0x11 ; C2100D11 s_buffer_load_dword s33, s[12:15], 0x12 ; C2108D12 s_buffer_load_dword s12, s[12:15], 0x13 ; C2060D13 v_mul_f32_e32 v12, v10, v3 ; 1018070A v_mad_f32 v12, v9, v6, -v12 ; D282000C 84320D09 v_mul_f32_e32 v6, v8, v6 ; 100C0D08 v_mad_f32 v6, v10, v7, -v6 ; D2820006 841A0F0A v_mul_f32_e32 v7, v9, v7 ; 100E0F09 v_mad_f32 v3, v8, v3, -v7 ; D2820003 841E0708 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s28 ; 7E0E021C v_mul_f32_e32 v7, s39, v7 ; 100E0E27 v_mov_b32_e32 v13, s29 ; 7E1A021D v_mul_f32_e32 v13, s39, v13 ; 101A1A27 v_mov_b32_e32 v14, s30 ; 7E1C021E v_mul_f32_e32 v14, s39, v14 ; 101C1C27 v_mul_f32_e32 v12, v12, v4 ; 1018090C v_mul_f32_e32 v6, v6, v4 ; 100C0906 v_mul_f32_e32 v3, v3, v4 ; 10060903 v_mul_f32_e32 v15, v3, v9 ; 101E1303 v_mad_f32 v15, v6, v10, -v15 ; D282000F 843E1506 v_mul_f32_e32 v16, v12, v10 ; 1020150C v_mad_f32 v16, v3, v8, -v16 ; D2820010 84421103 v_mul_f32_e32 v17, v6, v8 ; 10221106 v_mad_f32 v17, v12, v9, -v17 ; D2820011 8446130C v_mul_f32_e32 v15, v15, v4 ; 101E090F v_mul_f32_e32 v16, v16, v4 ; 10200910 v_mul_f32_e32 v4, v17, v4 ; 10080911 v_mov_b32_e32 v17, s20 ; 7E220214 v_mad_f32 v7, v17, s1, v7 ; D2820007 041C0311 v_mov_b32_e32 v17, s20 ; 7E220214 v_mad_f32 v13, v17, s26, v13 ; D282000D 04343511 v_mov_b32_e32 v17, s20 ; 7E220214 v_mad_f32 v14, v17, s27, v14 ; D282000E 04383711 v_mov_b32_e32 v17, s19 ; 7E220213 v_mad_f32 v7, v17, s2, v7 ; D2820007 041C0511 v_mov_b32_e32 v17, s19 ; 7E220213 v_mad_f32 v13, v17, s3, v13 ; D282000D 04340711 v_mov_b32_e32 v17, s19 ; 7E220213 v_mad_f32 v14, v17, s4, v14 ; D282000E 04380911 v_mul_f32_e32 v17, v7, v15 ; 10221F07 v_mul_f32_e32 v18, v7, v12 ; 10241907 v_mul_f32_e32 v7, v7, v8 ; 100E1107 v_mad_f32 v17, v16, v13, v17 ; D2820011 04461B10 v_mad_f32 v18, v6, v13, v18 ; D2820012 044A1B06 v_mad_f32 v7, v9, v13, v7 ; D2820007 041E1B09 v_mad_f32 v13, v4, v14, v17 ; D282000D 04461D04 v_mad_f32 v17, v3, v14, v18 ; D2820011 044A1D03 v_mad_f32 v7, v10, v14, v7 ; D2820007 041E1D0A exp 15, 35, 0, 0, 0, v13, v17, v7, v5 ; F800023F 0507110D s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v7, s32 ; 7E0E0220 v_mad_f32 v7, -s12, v2, v7 ; D2820007 241E040C v_mul_f32_e32 v13, s28, v7 ; 101A0E1C v_mul_f32_e32 v14, s29, v7 ; 101C0E1D v_mul_f32_e32 v7, s30, v7 ; 100E0E1E v_mov_b32_e32 v17, s31 ; 7E22021F v_mad_f32 v17, -s12, v0, v17 ; D2820011 2446000C v_mad_f32 v13, s1, v17, v13 ; D282000D 04362201 v_mad_f32 v14, s26, v17, v14 ; D282000E 043A221A v_mad_f32 v7, s27, v17, v7 ; D2820007 041E221B v_mov_b32_e32 v17, s33 ; 7E220221 v_mad_f32 v17, -s12, v1, v17 ; D2820011 2446020C v_mad_f32 v13, s2, v17, v13 ; D282000D 04362202 v_mad_f32 v14, s3, v17, v14 ; D282000E 043A2203 v_mad_f32 v7, s4, v17, v7 ; D2820007 041E2204 v_mul_f32_e32 v15, v13, v15 ; 101E1F0D v_mul_f32_e32 v12, v13, v12 ; 1018190D v_mul_f32_e32 v8, v13, v8 ; 1010110D v_mad_f32 v13, v16, v14, v15 ; D282000D 043E1D10 v_mad_f32 v6, v6, v14, v12 ; D2820006 04321D06 v_mad_f32 v8, v9, v14, v8 ; D2820008 04221D09 v_mad_f32 v4, v4, v7, v13 ; D2820004 04360F04 v_mad_f32 v3, v3, v7, v6 ; D2820003 041A0F03 v_mad_f32 v6, v10, v7, v8 ; D2820006 04220F0A v_mul_f32_e32 v7, s9, v2 ; 100E0409 v_mad_f32 v7, s5, v0, v7 ; D2820007 041E0005 v_mul_f32_e32 v8, s10, v2 ; 1010040A v_mad_f32 v8, s6, v0, v8 ; D2820008 04220006 v_mul_f32_e32 v9, s11, v2 ; 1012040B v_mad_f32 v9, s7, v0, v9 ; D2820009 04260007 v_mul_f32_e32 v10, s16, v2 ; 10140410 v_mad_f32 v10, s8, v0, v10 ; D282000A 042A0008 v_mad_f32 v7, s17, v1, v7 ; D2820007 041E0211 v_mad_f32 v8, s18, v1, v8 ; D2820008 04220212 v_mad_f32 v9, s21, v1, v9 ; D2820009 04260215 exp 15, 36, 0, 0, 0, v5, v5, v5, v5 ; F800024F 05050505 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 exp 15, 37, 0, 0, 0, v4, v3, v6, v5 ; F800025F 05060304 exp 15, 38, 0, 0, 0, v0, v2, v1, v11 ; F800026F 0B010200 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, s22, v1, v10 ; D2820000 042A0216 v_mad_f32 v1, s23, v11, v7 ; D2820001 041E1617 v_mad_f32 v2, s24, v11, v8 ; D2820002 04221618 v_mad_f32 v3, s25, v11, v9 ; D2820003 04261619 v_mad_f32 v0, s0, v11, v0 ; D2820000 04021600 exp 15, 12, 0, 1, 0, v1, v2, v3, v0 ; F80008CF 00030201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 32 Code Size: 1608 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[4], PERSPECTIVE DCL IN[2], TEXCOORD[5], PERSPECTIVE DCL IN[3], TEXCOORD[6], PERSPECTIVE DCL IN[4], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..6] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, -0.3333} IMM[1] FLT32 { 0.0000, -0.0000, 8.0000, 0.0398} IMM[2] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[3], IN[3] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[3], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[1], IN[1] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[1], TEMP[0].xxxx 8: TEX TEMP[3], IN[0], SAMP[1], 2D 9: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[0].x, TEMP[3], TEMP[3] 11: RSQ TEMP[0].x, TEMP[0].xxxx 12: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 13: MUL TEMP[4].xyz, TEMP[3], TEMP[0].xxxx 14: DP3 TEMP[1].w, TEMP[4], TEMP[1] 15: MUL TEMP[3].xyz, TEMP[1].wwww, TEMP[4] 16: MAD TEMP[1].xyz, TEMP[3], IMM[0].xxxx, -TEMP[1] 17: DP3 TEMP[1].w, IN[2], IN[2] 18: RSQ TEMP[0], |TEMP[1].wwww| 19: MIN TEMP[2].w, IMM[2].xxxx, TEMP[0] 20: MUL TEMP[3].xyz, TEMP[2].wwww, IN[2] 21: DP3 TEMP[2].w, TEMP[3], -CONST[8] 22: ADD TEMP[2].w, TEMP[2].wwww, -CONST[9].xxxx 23: MUL_SAT TEMP[2].w, TEMP[2].wwww, CONST[9].yyyy 24: MUL TEMP[2].w, TEMP[2].wwww, TEMP[2].wwww 25: ADD TEMP[1].w, -TEMP[1].wwww, IMM[0].zzzz 26: MUL TEMP[3], TEMP[2].wwww, TEMP[1].wwww 27: MOV TEMP[5], TEMP[3] 28: KILL_IF TEMP[5] 29: TEX TEMP[5], IN[0], SAMP[4], 2D 30: MOV TEMP[5].zw, IMM[0] 31: MAD TEMP[3].x, TEMP[5].xxxx, CONST[6].yyyy, TEMP[5].wwww 32: CMP TEMP[6], -TEMP[3].wwww, TEMP[3].xxxx, IMM[1].xxxx 33: KILL_IF TEMP[6] 34: UIF CONST[240].xxxx :0 35: RCP TEMP[3].x, IN[4].wwww 36: MUL TEMP[3].xy, TEMP[3].xxxx, IN[4] 37: MAD TEMP[3].xy, TEMP[3], CONST[1], CONST[1].wzzw 38: TEX TEMP[6], TEMP[3], SAMP[0], 2D 39: ELSE :41 40: MOV TEMP[6].xyz, IMM[0].zzzz 41: ENDIF 42: ADD TEMP[3].xyz, TEMP[5].zzzz, -CONST[0] 43: TEX TEMP[5], IN[0], SAMP[2], 2D 44: MUL TEMP[3].xyz, TEMP[3], TEMP[5] 45: MAD TEMP[3].xyz, TEMP[3], CONST[4].wwww, CONST[4] 46: MAX TEMP[4].w, TEMP[1].wwww, IMM[1].xxxx 47: ADD TEMP[1].w, TEMP[4].wwww, IMM[1].yyyy 48: POW TEMP[5].x, |TEMP[4].wwww|, CONST[7].wwww 49: CMP TEMP[1].w, TEMP[1].wwww, IMM[1].xxxx, TEMP[5].xxxx 50: TEX TEMP[5], IN[0], SAMP[3], 2D 51: MAD TEMP[5].xyz, TEMP[5], CONST[5].wwww, CONST[5] 52: DP3_SAT TEMP[4].x, TEMP[4], TEMP[2] 53: ADD TEMP[4].y, TEMP[4].xxxx, IMM[1].yyyy 54: DP3_SAT TEMP[1].x, TEMP[1], TEMP[2] 55: ADD TEMP[1].y, TEMP[1].xxxx, IMM[1].yyyy 56: POW TEMP[2].x, |TEMP[1].xxxx|, CONST[6].xxxx 57: MOV TEMP[1].z, IMM[1].zzzz 58: ADD TEMP[1].x, TEMP[1].zzzz, CONST[6].xxxx 59: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 60: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].wwww 61: MUL TEMP[2].xyz, TEMP[3], TEMP[4].xxxx 62: CMP TEMP[2].xyz, TEMP[4].yyyy, IMM[1].xxxx, TEMP[2] 63: MUL TEMP[3].xyz, TEMP[5], TEMP[1].xxxx 64: CMP TEMP[1].xyz, TEMP[1].yyyy, IMM[1].xxxx, TEMP[3] 65: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 66: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1] 67: MUL TEMP[1].xyz, TEMP[6], TEMP[1] 68: MUL TEMP[1].xyz, TEMP[1], CONST[7] 69: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[1] 70: CMP OUT[0].xyz, -TEMP[3].wwww, TEMP[1], IMM[1].xxxx 71: MOV OUT[0].w, IMM[1].xxxx 72: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %47 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %48 = load <32 x i8>, <32 x i8> addrspace(2)* %47, align 32, !tbaa !0 %49 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 %51 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %52 = bitcast <8 x i32> addrspace(2)* %51 to <32 x i8> addrspace(2)* %53 = load <32 x i8>, <32 x i8> addrspace(2)* %52, align 32, !tbaa !0 %54 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %55 = bitcast <4 x i32> addrspace(2)* %54 to <16 x i8> addrspace(2)* %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 %57 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %58 = bitcast <8 x i32> addrspace(2)* %57 to <32 x i8> addrspace(2)* %59 = load <32 x i8>, <32 x i8> addrspace(2)* %58, align 32, !tbaa !0 %60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %61 = bitcast <4 x i32> addrspace(2)* %60 to <16 x i8> addrspace(2)* %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %64 = bitcast <8 x i32> addrspace(2)* %63 to <32 x i8> addrspace(2)* %65 = load <32 x i8>, <32 x i8> addrspace(2)* %64, align 32, !tbaa !0 %66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %67 = bitcast <4 x i32> addrspace(2)* %66 to <16 x i8> addrspace(2)* %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %70 = bitcast <8 x i32> addrspace(2)* %69 to <32 x i8> addrspace(2)* %71 = load <32 x i8>, <32 x i8> addrspace(2)* %70, align 32, !tbaa !0 %72 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %73 = bitcast <4 x i32> addrspace(2)* %72 to <16 x i8> addrspace(2)* %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %86 = fmul float %83, %83 %87 = fmul float %84, %84 %88 = fadd float %87, %86 %89 = fmul float %85, %85 %90 = fadd float %88, %89 %91 = call float @llvm.AMDGPU.rsq.clamped.f32(float %90) %92 = call float @llvm.minnum.f32(float %91, float 0x47EFFFFFE0000000) %93 = fmul float %83, %92 %94 = fmul float %84, %92 %95 = fmul float %85, %92 %96 = fmul float %77, %77 %97 = fmul float %78, %78 %98 = fadd float %97, %96 %99 = fmul float %79, %79 %100 = fadd float %98, %99 %101 = call float @llvm.AMDGPU.rsq.clamped.f32(float %100) %102 = call float @llvm.minnum.f32(float %101, float 0x47EFFFFFE0000000) %103 = fmul float %77, %102 %104 = fmul float %78, %102 %105 = fmul float %79, %102 %106 = bitcast float %75 to i32 %107 = bitcast float %76 to i32 %108 = insertelement <2 x i32> undef, i32 %106, i32 0 %109 = insertelement <2 x i32> %108, i32 %107, i32 1 %110 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %109, <32 x i8> %53, <16 x i8> %56, i32 2) %111 = extractelement <4 x float> %110, i32 0 %112 = extractelement <4 x float> %110, i32 1 %113 = extractelement <4 x float> %110, i32 2 %114 = fmul float %111, 2.000000e+00 %115 = fadd float %114, -1.000000e+00 %116 = fmul float %112, 2.000000e+00 %117 = fadd float %116, -1.000000e+00 %118 = fmul float %113, 2.000000e+00 %119 = fadd float %118, -1.000000e+00 %120 = fmul float %115, %115 %121 = fmul float %117, %117 %122 = fadd float %121, %120 %123 = fmul float %119, %119 %124 = fadd float %122, %123 %125 = call float @llvm.AMDGPU.rsq.clamped.f32(float %124) %126 = call float @llvm.minnum.f32(float %125, float 0x47EFFFFFE0000000) %127 = fmul float %115, %126 %128 = fmul float %117, %126 %129 = fmul float %119, %126 %130 = fmul float %127, %93 %131 = fmul float %128, %94 %132 = fadd float %131, %130 %133 = fmul float %129, %95 %134 = fadd float %132, %133 %135 = fmul float %134, %127 %136 = fmul float %134, %128 %137 = fmul float %134, %129 %138 = fmul float %135, 2.000000e+00 %139 = fsub float %138, %93 %140 = fmul float %136, 2.000000e+00 %141 = fsub float %140, %94 %142 = fmul float %137, 2.000000e+00 %143 = fsub float %142, %95 %144 = fmul float %80, %80 %145 = fmul float %81, %81 %146 = fadd float %145, %144 %147 = fmul float %82, %82 %148 = fadd float %146, %147 %149 = call float @fabs(float %148) %150 = call float @llvm.AMDGPU.rsq.clamped.f32(float %149) %151 = call float @llvm.minnum.f32(float %150, float 0x47EFFFFFE0000000) %152 = fmul float %151, %80 %153 = fmul float %151, %81 %154 = fmul float %151, %82 %155 = fmul float %41, %152 %156 = fsub float -0.000000e+00, %155 %157 = fmul float %42, %153 %158 = fsub float %156, %157 %159 = fmul float %43, %154 %160 = fsub float %158, %159 %161 = fsub float %160, %44 %162 = fmul float %161, %45 %163 = call float @llvm.AMDIL.clamp.(float %162, float 0.000000e+00, float 1.000000e+00) %164 = fmul float %163, %163 %165 = fsub float 1.000000e+00, %148 %166 = fmul float %164, %165 %167 = fmul float %164, %165 %168 = fmul float %164, %165 %169 = fmul float %164, %165 %170 = fcmp olt float %166, 0.000000e+00 %171 = fcmp olt float %167, 0.000000e+00 %172 = fcmp olt float %168, 0.000000e+00 %173 = fcmp olt float %169, 0.000000e+00 %174 = or i1 %173, %172 %175 = or i1 %174, %171 %176 = or i1 %175, %170 %177 = select i1 %176, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %177) %178 = bitcast float %75 to i32 %179 = bitcast float %76 to i32 %180 = insertelement <2 x i32> undef, i32 %178, i32 0 %181 = insertelement <2 x i32> %180, i32 %179, i32 1 %182 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %181, <32 x i8> %71, <16 x i8> %74, i32 2) %183 = extractelement <4 x float> %182, i32 0 %184 = fmul float %183, %36 %185 = fadd float %184, 0xBFD554C980000000 %186 = fsub float -0.000000e+00, %169 %187 = call float @llvm.AMDGPU.cndlt(float %186, float %185, float 0.000000e+00) %188 = fsub float -0.000000e+00, %169 %189 = call float @llvm.AMDGPU.cndlt(float %188, float %185, float 0.000000e+00) %190 = fsub float -0.000000e+00, %169 %191 = call float @llvm.AMDGPU.cndlt(float %190, float %185, float 0.000000e+00) %192 = fsub float -0.000000e+00, %169 %193 = call float @llvm.AMDGPU.cndlt(float %192, float %185, float 0.000000e+00) %194 = fcmp olt float %187, 0.000000e+00 %195 = fcmp olt float %189, 0.000000e+00 %196 = fcmp olt float %191, 0.000000e+00 %197 = fcmp olt float %193, 0.000000e+00 %198 = or i1 %197, %196 %199 = or i1 %198, %195 %200 = or i1 %199, %194 %201 = select i1 %200, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %201) %202 = bitcast float %46 to i32 %203 = icmp eq i32 %202, 0 br i1 %203, label %ENDIF, label %IF IF: ; preds = %main_body %204 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %205 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %206 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %207 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %208 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %209 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %210 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %211 = fdiv float 1.000000e+00, %204 %212 = fmul float %211, %206 %213 = fmul float %211, %205 %214 = fmul float %212, %210 %215 = fadd float %214, %207 %216 = fmul float %213, %209 %217 = fadd float %216, %208 %218 = bitcast float %215 to i32 %219 = bitcast float %217 to i32 %220 = insertelement <2 x i32> undef, i32 %218, i32 0 %221 = insertelement <2 x i32> %220, i32 %219, i32 1 %222 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %221, <32 x i8> %48, <16 x i8> %50, i32 2) %223 = extractelement <4 x float> %222, i32 0 %224 = extractelement <4 x float> %222, i32 1 %225 = extractelement <4 x float> %222, i32 2 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp24.0 = phi float [ %223, %IF ], [ 1.000000e+00, %main_body ] %temp25.0 = phi float [ %224, %IF ], [ 1.000000e+00, %main_body ] %temp26.0 = phi float [ %225, %IF ], [ 1.000000e+00, %main_body ] %226 = fsub float 1.000000e+00, %24 %227 = fsub float 1.000000e+00, %25 %228 = fsub float 1.000000e+00, %26 %229 = bitcast float %75 to i32 %230 = bitcast float %76 to i32 %231 = insertelement <2 x i32> undef, i32 %229, i32 0 %232 = insertelement <2 x i32> %231, i32 %230, i32 1 %233 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %232, <32 x i8> %59, <16 x i8> %62, i32 2) %234 = extractelement <4 x float> %233, i32 0 %235 = extractelement <4 x float> %233, i32 1 %236 = extractelement <4 x float> %233, i32 2 %237 = fmul float %226, %234 %238 = fmul float %227, %235 %239 = fmul float %228, %236 %240 = fmul float %237, %30 %241 = fadd float %240, %27 %242 = fmul float %238, %30 %243 = fadd float %242, %28 %244 = fmul float %239, %30 %245 = fadd float %244, %29 %246 = call float @llvm.maxnum.f32(float %165, float 0.000000e+00) %247 = fadd float %246, 0xBEB0C6F7A0000000 %248 = call float @fabs(float %246) %249 = call float @llvm.pow.f32(float %248, float %40) %250 = call float @llvm.AMDGPU.cndlt(float %247, float 0.000000e+00, float %249) %251 = bitcast float %75 to i32 %252 = bitcast float %76 to i32 %253 = insertelement <2 x i32> undef, i32 %251, i32 0 %254 = insertelement <2 x i32> %253, i32 %252, i32 1 %255 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %254, <32 x i8> %65, <16 x i8> %68, i32 2) %256 = extractelement <4 x float> %255, i32 0 %257 = extractelement <4 x float> %255, i32 1 %258 = extractelement <4 x float> %255, i32 2 %259 = fmul float %256, %34 %260 = fadd float %259, %31 %261 = fmul float %257, %34 %262 = fadd float %261, %32 %263 = fmul float %258, %34 %264 = fadd float %263, %33 %265 = fmul float %127, %103 %266 = fmul float %128, %104 %267 = fadd float %266, %265 %268 = fmul float %129, %105 %269 = fadd float %267, %268 %270 = call float @llvm.AMDIL.clamp.(float %269, float 0.000000e+00, float 1.000000e+00) %271 = fadd float %270, 0xBEB0C6F7A0000000 %272 = fmul float %139, %103 %273 = fmul float %141, %104 %274 = fadd float %273, %272 %275 = fmul float %143, %105 %276 = fadd float %274, %275 %277 = call float @llvm.AMDIL.clamp.(float %276, float 0.000000e+00, float 1.000000e+00) %278 = fadd float %277, 0xBEB0C6F7A0000000 %279 = call float @fabs(float %277) %280 = call float @llvm.pow.f32(float %279, float %35) %281 = fadd float %35, 8.000000e+00 %282 = fmul float %281, %280 %283 = fmul float %282, 0x3FA45F3060000000 %284 = fmul float %241, %270 %285 = fmul float %243, %270 %286 = fmul float %245, %270 %287 = call float @llvm.AMDGPU.cndlt(float %271, float 0.000000e+00, float %284) %288 = call float @llvm.AMDGPU.cndlt(float %271, float 0.000000e+00, float %285) %289 = call float @llvm.AMDGPU.cndlt(float %271, float 0.000000e+00, float %286) %290 = fmul float %260, %283 %291 = fmul float %262, %283 %292 = fmul float %264, %283 %293 = call float @llvm.AMDGPU.cndlt(float %278, float 0.000000e+00, float %290) %294 = call float @llvm.AMDGPU.cndlt(float %278, float 0.000000e+00, float %291) %295 = call float @llvm.AMDGPU.cndlt(float %278, float 0.000000e+00, float %292) %296 = fadd float %293, %287 %297 = fadd float %294, %288 %298 = fadd float %295, %289 %299 = fmul float %250, %296 %300 = fmul float %250, %297 %301 = fmul float %250, %298 %302 = fmul float %temp24.0, %299 %303 = fmul float %temp25.0, %300 %304 = fmul float %temp26.0, %301 %305 = fmul float %302, %37 %306 = fmul float %303, %38 %307 = fmul float %304, %39 %308 = fmul float %164, %305 %309 = fmul float %164, %306 %310 = fmul float %164, %307 %311 = fsub float -0.000000e+00, %169 %312 = call float @llvm.AMDGPU.cndlt(float %311, float %308, float 0.000000e+00) %313 = fsub float -0.000000e+00, %169 %314 = call float @llvm.AMDGPU.cndlt(float %313, float %309, float 0.000000e+00) %315 = fsub float -0.000000e+00, %169 %316 = call float @llvm.AMDGPU.cndlt(float %315, float %310, float 0.000000e+00) %317 = call i32 @llvm.SI.packf16(float %312, float %314) %318 = bitcast i32 %317 to float %319 = call i32 @llvm.SI.packf16(float %316, float 0.000000e+00) %320 = bitcast i32 %319 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %318, float %320, float %318, float %320) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_movk_i32 s8, 0xf00 ; B0080F00 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s22, s[0:3], s8 ; C20B0008 s_buffer_load_dword s23, s[0:3], 0x17 ; C20B8117 s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124 s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125 v_interp_p1_f32 v10, v0, 1, 1, [m0] ; C8280500 v_interp_p2_f32 v10, [v10], v1, 1, 1, [m0] ; C8290501 v_interp_p1_f32 v11, v0, 2, 1, [m0] ; C82C0600 v_interp_p2_f32 v11, [v11], v1, 2, 1, [m0] ; C82D0601 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900 v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901 v_interp_p1_f32 v12, v0, 2, 2, [m0] ; C8300A00 v_interp_p2_f32 v12, [v12], v1, 2, 2, [m0] ; C8310A01 v_interp_p1_f32 v5, v0, 0, 3, [m0] ; C8140C00 v_interp_p2_f32 v5, [v5], v1, 0, 3, [m0] ; C8150C01 v_interp_p1_f32 v6, v0, 1, 3, [m0] ; C8180D00 v_interp_p2_f32 v6, [v6], v1, 1, 3, [m0] ; C8190D01 v_interp_p1_f32 v8, v0, 2, 3, [m0] ; C8200E00 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 v_interp_p2_f32 v8, [v8], v1, 2, 3, [m0] ; C8210E01 v_mul_f32_e32 v13, v7, v7 ; 101A0F07 v_mad_f32 v13, v9, v9, v13 ; D282000D 04361309 s_buffer_load_dword s10, s[0:3], 0x20 ; C2050120 v_mad_f32 v13, v12, v12, v13 ; D282000D 0436190C s_buffer_load_dword s11, s[0:3], 0x21 ; C2058121 v_rsq_clamp_f32_e64 v14, |v13| ; D358010E 0000010D s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510 s_load_dwordx8 s[32:39], s[6:7], 0x20 ; C0D00720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800700 00660F02 s_buffer_load_dword s12, s[0:3], 0x22 ; C2060122 v_min_f32_e32 v14, 0x7f7fffff, v14 ; 1E1C1CFF 7F7FFFFF v_mul_f32_e32 v7, v7, v14 ; 100E1D07 v_mul_f32_e32 v7, s10, v7 ; 100E0E0A v_mul_f32_e32 v9, v9, v14 ; 10121D09 v_mad_f32 v7, -s11, v9, -v7 ; D2820007 A41E120B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v18, 2.0, v15, -1.0 ; D2820012 03CE1EF4 v_mad_f32 v19, 2.0, v16, -1.0 ; D2820013 03CE20F4 v_mad_f32 v20, 2.0, v17, -1.0 ; D2820014 03CE22F4 v_mul_f32_e32 v9, v12, v14 ; 10121D0C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, -s12, v9, v7 ; D2820007 241E120C v_subrev_f32_e32 v7, s8, v7 ; 0A0E0E08 v_mul_f32_e32 v7, s9, v7 ; 100E0E09 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 v_mul_f32_e32 v7, v7, v7 ; 100E0F07 v_sub_f32_e32 v12, 1.0, v13 ; 08181AF2 v_mul_f32_e32 v9, v12, v7 ; 10120F0C v_cmp_gt_f32_e32 vcc, 0, v9 ; 7C081280 v_cndmask_b32_e64 v13, 0, -1.0, vcc ; D200000D 01A9E680 v_cndmask_b32_e64 v13, v13, -1.0, vcc ; D200000D 01A9E70D v_cndmask_b32_e64 v13, v13, -1.0, vcc ; D200000D 01A9E70D v_cndmask_b32_e64 v14, v13, -1.0, vcc ; D200000E 01A9E70D v_mul_f32_e32 v13, v5, v5 ; 101A0B05 v_mad_f32 v13, v6, v6, v13 ; D282000D 04360D06 v_mad_f32 v13, v8, v8, v13 ; D282000D 04361108 v_rsq_clamp_f32_e32 v21, v13 ; 7E2A590D v_mul_f32_e32 v13, v4, v4 ; 101A0904 v_mad_f32 v13, v10, v10, v13 ; D282000D 0436150A v_mad_f32 v13, v11, v11, v13 ; D282000D 0436170B v_rsq_clamp_f32_e32 v22, v13 ; 7E2C590D v_mul_f32_e32 v13, v18, v18 ; 101A2512 s_buffer_load_dword s10, s[0:3], 0x19 ; C2050119 v_mad_f32 v13, v19, v19, v13 ; D282000D 04362713 v_mad_f32 v13, v20, v20, v13 ; D282000D 04362914 v_rsq_clamp_f32_e32 v23, v13 ; 7E2E590D v_mov_b32_e32 v13, 1.0 ; 7E1A02F2 v_cmpx_le_f32_e32 vcc, 0, v14 ; 7C261C80 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 s_buffer_load_dword s9, s[0:3], 0x1c ; C204811C s_buffer_load_dword s8, s[0:3], 0x1d ; C204011D image_sample v14, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[16:19] ; F0800100 00880E02 v_mov_b32_e32 v15, 0xbeaaa64c ; 7E1E02FF BEAAA64C s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v14, s10, v14, v15 ; D282000E 043E1C0A v_xor_b32_e32 v15, 0x80000000, v9 ; 3A1E12FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v15 ; 7C081E80 v_cndmask_b32_e64 v14, 0, v14, vcc ; D200000E 01AA1C80 s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s10, s[0:3], 0x1e ; C205011E s_buffer_load_dword s12, s[0:3], 0x1f ; C206011F s_buffer_load_dword s20, s[0:3], 0x0 ; C20A0100 s_buffer_load_dword s21, s[0:3], 0x1 ; C20A8101 s_buffer_load_dword s19, s[0:3], 0x2 ; C2098102 s_buffer_load_dword s13, s[0:3], 0x10 ; C2068110 s_buffer_load_dword s14, s[0:3], 0x11 ; C2070111 s_buffer_load_dword s15, s[0:3], 0x12 ; C2078112 s_buffer_load_dword s24, s[0:3], 0x13 ; C20C0113 s_buffer_load_dword s18, s[0:3], 0x14 ; C2090114 s_buffer_load_dword s17, s[0:3], 0x15 ; C2088115 s_buffer_load_dword s16, s[0:3], 0x16 ; C2080116 v_cmp_gt_f32_e32 vcc, 0, v14 ; 7C081C80 v_cndmask_b32_e64 v14, 0, -1.0, vcc ; D200000E 01A9E680 v_cndmask_b32_e64 v14, v14, -1.0, vcc ; D200000E 01A9E70E v_cndmask_b32_e64 v14, v14, -1.0, vcc ; D200000E 01A9E70E v_cndmask_b32_e64 v14, v14, -1.0, vcc ; D200000E 01A9E70E v_cmpx_le_f32_e32 vcc, 0, v14 ; 7C261C80 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_mov_b32_e32 v14, 1.0 ; 7E1C02F2 v_mov_b32_e32 v15, 1.0 ; 7E1E02F2 v_cmp_ne_i32_e64 s[26:27], 0, s22 ; D10A001A 00002C80 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v16, s24 ; 7E200218 v_mov_b32_e32 v17, s23 ; 7E220217 s_and_saveexec_b64 s[22:23], s[26:27] ; BE96241A s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E s_cbranch_execz BB0_2 ; BF880000 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 v_interp_p1_f32 v13, v0, 3, 4, [m0] ; C8341300 s_buffer_load_dword s36, s[0:3], 0x6 ; C2120106 s_buffer_load_dword s37, s[0:3], 0x7 ; C2128107 s_buffer_load_dword s38, s[0:3], 0x4 ; C2130104 s_buffer_load_dword s39, s[0:3], 0x5 ; C2138105 v_interp_p2_f32 v13, [v13], v1, 3, 4, [m0] ; C8351301 v_interp_p1_f32 v14, v0, 1, 4, [m0] ; C8381100 v_rcp_f32_e32 v13, v13 ; 7E1A550D v_interp_p2_f32 v14, [v14], v1, 1, 4, [m0] ; C8391101 v_interp_p1_f32 v0, v0, 0, 4, [m0] ; C8001000 v_interp_p2_f32 v0, [v0], v1, 0, 4, [m0] ; C8011001 v_mul_f32_e32 v0, v0, v13 ; 10001B00 v_mul_f32_e32 v1, v14, v13 ; 10021B0E s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v13, s37 ; 7E1A0225 v_mad_f32 v13, s38, v0, v13 ; D282000D 04360026 v_mov_b32_e32 v0, s36 ; 7E000224 v_mad_f32 v14, s39, v1, v0 ; D282000E 04020227 image_sample v[13:15], 7, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[24:31], s[32:35] ; F0800700 01060D0D s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[22:23] ; 88FE167E v_min_f32_e32 v0, 0x7f7fffff, v21 ; 1E002AFF 7F7FFFFF v_min_f32_e32 v1, 0x7f7fffff, v22 ; 1E022CFF 7F7FFFFF v_mul_f32_e32 v4, v1, v4 ; 10080901 v_mul_f32_e32 v10, v1, v10 ; 10141501 v_mul_f32_e32 v1, v1, v11 ; 10021701 v_min_f32_e32 v11, 0x7f7fffff, v23 ; 1E162EFF 7F7FFFFF v_mul_f32_e32 v18, v11, v18 ; 1024250B v_mul_f32_e32 v19, v11, v19 ; 1026270B v_mul_f32_e32 v11, v11, v20 ; 1016290B v_sub_f32_e64 v20, 1.0, s20 ; D2080014 000028F2 v_sub_f32_e64 v21, 1.0, s21 ; D2080015 00002AF2 s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508 s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710 s_load_dwordx8 s[32:39], s[6:7], 0x18 ; C0D00718 v_sub_f32_e64 v22, 1.0, s19 ; D2080016 000026F2 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[0:3] ; F0800700 00061702 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v20, v23, v20 ; 10282917 v_mul_f32_e32 v21, v24, v21 ; 102A2B18 v_mul_f32_e32 v22, v25, v22 ; 102C2D19 image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[20:23] ; F0800700 00A81702 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v17, v23, s18 ; D2820002 004A2F11 v_mad_f32 v3, v24, v17, s17 ; D2820003 00462318 v_mad_f32 v17, v25, v17, s16 ; D2820011 00422319 v_mul_f32_e32 v23, v0, v5 ; 102E0B00 v_mul_f32_e32 v23, v23, v18 ; 102E2517 v_mul_f32_e32 v24, v0, v6 ; 10300D00 v_mad_f32 v23, v19, v24, v23 ; D2820017 045E3113 v_mul_f32_e32 v24, v0, v8 ; 10301100 v_mad_f32 v23, v11, v24, v23 ; D2820017 045E310B v_mul_f32_e32 v24, v18, v23 ; 10302F12 v_mad_f32 v24, v23, v18, v24 ; D2820018 04622517 v_mad_f32 v5, -v5, v0, v24 ; D2820005 24620105 v_mul_f32_e32 v24, v19, v23 ; 10302F13 v_mad_f32 v24, v23, v19, v24 ; D2820018 04622717 v_mad_f32 v6, -v6, v0, v24 ; D2820006 24620106 v_mul_f32_e32 v24, v11, v23 ; 10302F0B v_mad_f32 v23, v23, v11, v24 ; D2820017 04621717 v_mad_f32 v0, -v8, v0, v23 ; D2820000 245E0108 v_mad_f32 v8, v16, v20, s13 ; D2820008 00362910 v_mad_f32 v20, v21, v16, s14 ; D2820014 003A2115 v_mad_f32 v16, v22, v16, s15 ; D2820010 003E2116 v_max_f32_e32 v12, 0, v12 ; 20181880 v_mov_b32_e32 v21, 0xb58637bd ; 7E2A02FF B58637BD v_add_f32_e32 v22, v21, v12 ; 062C1915 v_mov_b32_e32 v23, 0x7fffffff ; 7E2E02FF 7FFFFFFF v_and_b32_e32 v12, v12, v23 ; 36182F0C v_log_f32_e32 v12, v12 ; 7E184F0C v_cmp_gt_f32_e32 vcc, 0, v22 ; 7C082C80 v_mul_f32_e32 v18, v4, v18 ; 10242504 v_mad_f32 v18, v19, v10, v18 ; D2820012 044A1513 v_mul_legacy_f32_e32 v12, s12, v12 ; 0E18180C v_exp_f32_e32 v12, v12 ; 7E184B0C v_cndmask_b32_e64 v12, v12, 0, vcc ; D200000C 01A9010C v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_mad_f32 v4, v6, v10, v4 ; D2820004 04121506 v_mad_f32 v5, v11, v1, v18 ; D2820005 044A030B v_mad_f32 v0, v0, v1, v4 ; D2820000 04120300 v_add_f32_e64 v1, 0, v5 clamp ; D2060801 00020A80 v_mul_f32_e32 v4, v1, v8 ; 10081101 v_mul_f32_e32 v5, v1, v20 ; 100A2901 v_mul_f32_e32 v6, v1, v16 ; 100C2101 v_add_f32_e32 v1, v21, v1 ; 06020315 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_and_b32_e32 v8, v0, v23 ; 36102F00 v_log_f32_e32 v8, v8 ; 7E104F08 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, v4, 0, vcc ; D2000001 01A90104 v_cndmask_b32_e64 v4, v5, 0, vcc ; D2000004 01A90105 v_cndmask_b32_e64 v5, v6, 0, vcc ; D2000005 01A90106 v_add_f32_e32 v0, v21, v0 ; 06000115 v_mul_legacy_f32_e32 v6, s11, v8 ; 0E0C100B v_mov_b32_e32 v8, 0x41000000 ; 7E1002FF 41000000 v_add_f32_e32 v8, s11, v8 ; 0610100B v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_mul_f32_e32 v6, v6, v8 ; 100C1106 v_mul_f32_e32 v6, 0x3d22f983, v6 ; 100C0CFF 3D22F983 v_mul_f32_e32 v2, v6, v2 ; 10040506 v_mul_f32_e32 v3, v6, v3 ; 10060706 v_mul_f32_e32 v6, v6, v17 ; 100C2306 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v2, 0, vcc ; D2000000 01A90102 v_cndmask_b32_e64 v2, v3, 0, vcc ; D2000002 01A90103 v_cndmask_b32_e64 v3, v6, 0, vcc ; D2000003 01A90106 v_add_f32_e32 v0, v1, v0 ; 06000101 v_add_f32_e32 v1, v4, v2 ; 06020504 v_add_f32_e32 v2, v5, v3 ; 06040705 v_mul_f32_e32 v0, v0, v12 ; 10001900 v_mul_f32_e32 v1, v1, v12 ; 10021901 v_mul_f32_e32 v2, v2, v12 ; 10041902 v_mul_f32_e32 v0, v0, v13 ; 10001B00 v_mul_f32_e32 v1, v1, v14 ; 10021D01 v_mul_f32_e32 v2, v2, v15 ; 10041F02 v_mul_f32_e32 v0, s9, v0 ; 10000009 v_mul_f32_e32 v1, s8, v1 ; 10020208 v_mul_f32_e32 v2, s10, v2 ; 1004040A v_mul_f32_e32 v0, v0, v7 ; 10000F00 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_mul_f32_e32 v2, v2, v7 ; 10040F02 v_xor_b32_e32 v3, 0x80000000, v9 ; 3A0612FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v0, 0, v0, vcc ; D2000000 01AA0080 v_cndmask_b32_e64 v1, 0, v1, vcc ; D2000001 01AA0280 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cndmask_b32_e64 v1, 0, v2, vcc ; D2000001 01AA0480 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 28 Code Size: 1360 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[4], PERSPECTIVE DCL IN[2], TEXCOORD[5], PERSPECTIVE DCL IN[3], TEXCOORD[6], PERSPECTIVE DCL IN[4], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..6] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { -0.0000, 8.0000, 0.0398, 0.0000} IMM[2] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[3], IN[3] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[3], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[1], IN[1] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[1], TEMP[0].xxxx 8: TEX TEMP[3], IN[0], SAMP[1], 2D 9: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[0].x, TEMP[3], TEMP[3] 11: RSQ TEMP[0].x, TEMP[0].xxxx 12: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 13: MUL TEMP[4].xyz, TEMP[3], TEMP[0].xxxx 14: DP3 TEMP[1].w, TEMP[4], TEMP[1] 15: MUL TEMP[3].xyz, TEMP[1].wwww, TEMP[4] 16: MAD TEMP[1].xyz, TEMP[3], IMM[0].xxxx, -TEMP[1] 17: DP3 TEMP[1].w, IN[2], IN[2] 18: RSQ TEMP[0], |TEMP[1].wwww| 19: MIN TEMP[2].w, IMM[2].xxxx, TEMP[0] 20: MUL TEMP[3].xyz, TEMP[2].wwww, IN[2] 21: DP3 TEMP[2].w, TEMP[3], -CONST[8] 22: ADD TEMP[2].w, TEMP[2].wwww, -CONST[9].xxxx 23: MUL_SAT TEMP[2].w, TEMP[2].wwww, CONST[9].yyyy 24: MUL TEMP[2].w, TEMP[2].wwww, TEMP[2].wwww 25: ADD TEMP[1].w, -TEMP[1].wwww, IMM[0].zzzz 26: MUL TEMP[3], TEMP[2].wwww, TEMP[1].wwww 27: MOV TEMP[5], TEMP[3] 28: KILL_IF TEMP[5] 29: UIF CONST[240].xxxx :0 30: RCP TEMP[3].x, IN[4].wwww 31: MUL TEMP[3].xy, TEMP[3].xxxx, IN[4] 32: MAD TEMP[3].xy, TEMP[3], CONST[1], CONST[1].wzzw 33: TEX TEMP[5], TEMP[3], SAMP[0], 2D 34: ELSE :36 35: MOV TEMP[5].xyz, IMM[0].zzzz 36: ENDIF 37: MOV TEMP[3].z, IMM[0].zzzz 38: ADD TEMP[3].xyz, TEMP[3].zzzz, -CONST[0] 39: TEX TEMP[6], IN[0], SAMP[2], 2D 40: MUL TEMP[3].xyz, TEMP[3], TEMP[6] 41: MAD TEMP[3].xyz, TEMP[3], CONST[4].wwww, CONST[4] 42: MAX TEMP[4].w, TEMP[1].wwww, IMM[0].wwww 43: ADD TEMP[1].w, TEMP[4].wwww, IMM[1].xxxx 44: POW TEMP[5].w, |TEMP[4].wwww|, CONST[7].wwww 45: CMP TEMP[1].w, TEMP[1].wwww, IMM[0].wwww, TEMP[5].wwww 46: TEX TEMP[6], IN[0], SAMP[3], 2D 47: MAD TEMP[6].xyz, TEMP[6], CONST[5].wwww, CONST[5] 48: DP3_SAT TEMP[4].x, TEMP[4], TEMP[2] 49: ADD TEMP[4].y, TEMP[4].xxxx, IMM[1].xxxx 50: DP3_SAT TEMP[1].x, TEMP[1], TEMP[2] 51: ADD TEMP[1].y, TEMP[1].xxxx, IMM[1].xxxx 52: POW TEMP[2].x, |TEMP[1].xxxx|, CONST[6].xxxx 53: MOV TEMP[2].y, IMM[1].yyyy 54: ADD TEMP[1].x, TEMP[2].yyyy, CONST[6].xxxx 55: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 56: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz 57: MUL TEMP[2].xyz, TEMP[3], TEMP[4].xxxx 58: CMP TEMP[2].xyz, TEMP[4].yyyy, IMM[0].wwww, TEMP[2] 59: MUL TEMP[3].xyz, TEMP[6], TEMP[1].xxxx 60: CMP TEMP[1].xyz, TEMP[1].yyyy, IMM[0].wwww, TEMP[3] 61: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 62: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1] 63: MUL TEMP[1].xyz, TEMP[5], TEMP[1] 64: MUL TEMP[1].xyz, TEMP[1], CONST[7] 65: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[1] 66: CMP OUT[0].xyz, -TEMP[3].wwww, TEMP[1], IMM[0].wwww 67: MOV OUT[0].w, IMM[0].wwww 68: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %46 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %47 = load <32 x i8>, <32 x i8> addrspace(2)* %46, align 32, !tbaa !0 %48 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !tbaa !0 %50 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %51 = bitcast <8 x i32> addrspace(2)* %50 to <32 x i8> addrspace(2)* %52 = load <32 x i8>, <32 x i8> addrspace(2)* %51, align 32, !tbaa !0 %53 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %54 = bitcast <4 x i32> addrspace(2)* %53 to <16 x i8> addrspace(2)* %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %57 = bitcast <8 x i32> addrspace(2)* %56 to <32 x i8> addrspace(2)* %58 = load <32 x i8>, <32 x i8> addrspace(2)* %57, align 32, !tbaa !0 %59 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %60 = bitcast <4 x i32> addrspace(2)* %59 to <16 x i8> addrspace(2)* %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 %62 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %63 = bitcast <8 x i32> addrspace(2)* %62 to <32 x i8> addrspace(2)* %64 = load <32 x i8>, <32 x i8> addrspace(2)* %63, align 32, !tbaa !0 %65 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %66 = bitcast <4 x i32> addrspace(2)* %65 to <16 x i8> addrspace(2)* %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %71 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %79 = fmul float %76, %76 %80 = fmul float %77, %77 %81 = fadd float %80, %79 %82 = fmul float %78, %78 %83 = fadd float %81, %82 %84 = call float @llvm.AMDGPU.rsq.clamped.f32(float %83) %85 = call float @llvm.minnum.f32(float %84, float 0x47EFFFFFE0000000) %86 = fmul float %76, %85 %87 = fmul float %77, %85 %88 = fmul float %78, %85 %89 = fmul float %70, %70 %90 = fmul float %71, %71 %91 = fadd float %90, %89 %92 = fmul float %72, %72 %93 = fadd float %91, %92 %94 = call float @llvm.AMDGPU.rsq.clamped.f32(float %93) %95 = call float @llvm.minnum.f32(float %94, float 0x47EFFFFFE0000000) %96 = fmul float %70, %95 %97 = fmul float %71, %95 %98 = fmul float %72, %95 %99 = bitcast float %68 to i32 %100 = bitcast float %69 to i32 %101 = insertelement <2 x i32> undef, i32 %99, i32 0 %102 = insertelement <2 x i32> %101, i32 %100, i32 1 %103 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %102, <32 x i8> %52, <16 x i8> %55, i32 2) %104 = extractelement <4 x float> %103, i32 0 %105 = extractelement <4 x float> %103, i32 1 %106 = extractelement <4 x float> %103, i32 2 %107 = fmul float %104, 2.000000e+00 %108 = fadd float %107, -1.000000e+00 %109 = fmul float %105, 2.000000e+00 %110 = fadd float %109, -1.000000e+00 %111 = fmul float %106, 2.000000e+00 %112 = fadd float %111, -1.000000e+00 %113 = fmul float %108, %108 %114 = fmul float %110, %110 %115 = fadd float %114, %113 %116 = fmul float %112, %112 %117 = fadd float %115, %116 %118 = call float @llvm.AMDGPU.rsq.clamped.f32(float %117) %119 = call float @llvm.minnum.f32(float %118, float 0x47EFFFFFE0000000) %120 = fmul float %108, %119 %121 = fmul float %110, %119 %122 = fmul float %112, %119 %123 = fmul float %120, %86 %124 = fmul float %121, %87 %125 = fadd float %124, %123 %126 = fmul float %122, %88 %127 = fadd float %125, %126 %128 = fmul float %127, %120 %129 = fmul float %127, %121 %130 = fmul float %127, %122 %131 = fmul float %128, 2.000000e+00 %132 = fsub float %131, %86 %133 = fmul float %129, 2.000000e+00 %134 = fsub float %133, %87 %135 = fmul float %130, 2.000000e+00 %136 = fsub float %135, %88 %137 = fmul float %73, %73 %138 = fmul float %74, %74 %139 = fadd float %138, %137 %140 = fmul float %75, %75 %141 = fadd float %139, %140 %142 = call float @fabs(float %141) %143 = call float @llvm.AMDGPU.rsq.clamped.f32(float %142) %144 = call float @llvm.minnum.f32(float %143, float 0x47EFFFFFE0000000) %145 = fmul float %144, %73 %146 = fmul float %144, %74 %147 = fmul float %144, %75 %148 = fmul float %40, %145 %149 = fsub float -0.000000e+00, %148 %150 = fmul float %41, %146 %151 = fsub float %149, %150 %152 = fmul float %42, %147 %153 = fsub float %151, %152 %154 = fsub float %153, %43 %155 = fmul float %154, %44 %156 = call float @llvm.AMDIL.clamp.(float %155, float 0.000000e+00, float 1.000000e+00) %157 = fmul float %156, %156 %158 = fsub float 1.000000e+00, %141 %159 = fmul float %157, %158 %160 = fmul float %157, %158 %161 = fmul float %157, %158 %162 = fmul float %157, %158 %163 = fcmp olt float %159, 0.000000e+00 %164 = fcmp olt float %160, 0.000000e+00 %165 = fcmp olt float %161, 0.000000e+00 %166 = fcmp olt float %162, 0.000000e+00 %167 = or i1 %166, %165 %168 = or i1 %167, %164 %169 = or i1 %168, %163 %170 = select i1 %169, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %170) %171 = bitcast float %45 to i32 %172 = icmp eq i32 %171, 0 br i1 %172, label %ENDIF, label %IF IF: ; preds = %main_body %173 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %174 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %175 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %176 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %177 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %178 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %179 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %180 = fdiv float 1.000000e+00, %173 %181 = fmul float %180, %175 %182 = fmul float %180, %174 %183 = fmul float %181, %179 %184 = fadd float %183, %176 %185 = fmul float %182, %178 %186 = fadd float %185, %177 %187 = bitcast float %184 to i32 %188 = bitcast float %186 to i32 %189 = insertelement <2 x i32> undef, i32 %187, i32 0 %190 = insertelement <2 x i32> %189, i32 %188, i32 1 %191 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %190, <32 x i8> %47, <16 x i8> %49, i32 2) %192 = extractelement <4 x float> %191, i32 0 %193 = extractelement <4 x float> %191, i32 1 %194 = extractelement <4 x float> %191, i32 2 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp20.0 = phi float [ %192, %IF ], [ 1.000000e+00, %main_body ] %temp21.0 = phi float [ %193, %IF ], [ 1.000000e+00, %main_body ] %temp22.0 = phi float [ %194, %IF ], [ 1.000000e+00, %main_body ] %195 = fsub float 1.000000e+00, %24 %196 = fsub float 1.000000e+00, %25 %197 = fsub float 1.000000e+00, %26 %198 = bitcast float %68 to i32 %199 = bitcast float %69 to i32 %200 = insertelement <2 x i32> undef, i32 %198, i32 0 %201 = insertelement <2 x i32> %200, i32 %199, i32 1 %202 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %201, <32 x i8> %58, <16 x i8> %61, i32 2) %203 = extractelement <4 x float> %202, i32 0 %204 = extractelement <4 x float> %202, i32 1 %205 = extractelement <4 x float> %202, i32 2 %206 = fmul float %195, %203 %207 = fmul float %196, %204 %208 = fmul float %197, %205 %209 = fmul float %206, %30 %210 = fadd float %209, %27 %211 = fmul float %207, %30 %212 = fadd float %211, %28 %213 = fmul float %208, %30 %214 = fadd float %213, %29 %215 = call float @llvm.maxnum.f32(float %158, float 0.000000e+00) %216 = fadd float %215, 0xBEB0C6F7A0000000 %217 = call float @fabs(float %215) %218 = call float @llvm.pow.f32(float %217, float %39) %219 = call float @llvm.AMDGPU.cndlt(float %216, float 0.000000e+00, float %218) %220 = bitcast float %68 to i32 %221 = bitcast float %69 to i32 %222 = insertelement <2 x i32> undef, i32 %220, i32 0 %223 = insertelement <2 x i32> %222, i32 %221, i32 1 %224 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %223, <32 x i8> %64, <16 x i8> %67, i32 2) %225 = extractelement <4 x float> %224, i32 0 %226 = extractelement <4 x float> %224, i32 1 %227 = extractelement <4 x float> %224, i32 2 %228 = fmul float %225, %34 %229 = fadd float %228, %31 %230 = fmul float %226, %34 %231 = fadd float %230, %32 %232 = fmul float %227, %34 %233 = fadd float %232, %33 %234 = fmul float %120, %96 %235 = fmul float %121, %97 %236 = fadd float %235, %234 %237 = fmul float %122, %98 %238 = fadd float %236, %237 %239 = call float @llvm.AMDIL.clamp.(float %238, float 0.000000e+00, float 1.000000e+00) %240 = fadd float %239, 0xBEB0C6F7A0000000 %241 = fmul float %132, %96 %242 = fmul float %134, %97 %243 = fadd float %242, %241 %244 = fmul float %136, %98 %245 = fadd float %243, %244 %246 = call float @llvm.AMDIL.clamp.(float %245, float 0.000000e+00, float 1.000000e+00) %247 = fadd float %246, 0xBEB0C6F7A0000000 %248 = call float @fabs(float %246) %249 = call float @llvm.pow.f32(float %248, float %35) %250 = fadd float %35, 8.000000e+00 %251 = fmul float %250, %249 %252 = fmul float %251, 0x3FA45F3060000000 %253 = fmul float %210, %239 %254 = fmul float %212, %239 %255 = fmul float %214, %239 %256 = call float @llvm.AMDGPU.cndlt(float %240, float 0.000000e+00, float %253) %257 = call float @llvm.AMDGPU.cndlt(float %240, float 0.000000e+00, float %254) %258 = call float @llvm.AMDGPU.cndlt(float %240, float 0.000000e+00, float %255) %259 = fmul float %229, %252 %260 = fmul float %231, %252 %261 = fmul float %233, %252 %262 = call float @llvm.AMDGPU.cndlt(float %247, float 0.000000e+00, float %259) %263 = call float @llvm.AMDGPU.cndlt(float %247, float 0.000000e+00, float %260) %264 = call float @llvm.AMDGPU.cndlt(float %247, float 0.000000e+00, float %261) %265 = fadd float %262, %256 %266 = fadd float %263, %257 %267 = fadd float %264, %258 %268 = fmul float %219, %265 %269 = fmul float %219, %266 %270 = fmul float %219, %267 %271 = fmul float %temp20.0, %268 %272 = fmul float %temp21.0, %269 %273 = fmul float %temp22.0, %270 %274 = fmul float %271, %36 %275 = fmul float %272, %37 %276 = fmul float %273, %38 %277 = fmul float %157, %274 %278 = fmul float %157, %275 %279 = fmul float %157, %276 %280 = fsub float -0.000000e+00, %162 %281 = call float @llvm.AMDGPU.cndlt(float %280, float %277, float 0.000000e+00) %282 = fsub float -0.000000e+00, %162 %283 = call float @llvm.AMDGPU.cndlt(float %282, float %278, float 0.000000e+00) %284 = fsub float -0.000000e+00, %162 %285 = call float @llvm.AMDGPU.cndlt(float %284, float %279, float 0.000000e+00) %286 = call i32 @llvm.SI.packf16(float %281, float %283) %287 = bitcast i32 %286 to float %288 = call i32 @llvm.SI.packf16(float %285, float 0.000000e+00) %289 = bitcast i32 %288 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %287, float %289, float %287, float %289) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v9, v0, 0, 1, [m0] ; C8240400 v_interp_p2_f32 v9, [v9], v1, 0, 1, [m0] ; C8250401 v_interp_p1_f32 v10, v0, 1, 1, [m0] ; C8280500 v_interp_p2_f32 v10, [v10], v1, 1, 1, [m0] ; C8290501 v_interp_p1_f32 v12, v0, 2, 1, [m0] ; C8300600 v_interp_p2_f32 v12, [v12], v1, 2, 1, [m0] ; C8310601 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00 v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01 v_interp_p1_f32 v4, v0, 0, 3, [m0] ; C8100C00 s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 v_interp_p2_f32 v4, [v4], v1, 0, 3, [m0] ; C8110C01 v_interp_p1_f32 v5, v0, 1, 3, [m0] ; C8140D00 v_interp_p2_f32 v5, [v5], v1, 1, 3, [m0] ; C8150D01 v_interp_p1_f32 v7, v0, 2, 3, [m0] ; C81C0E00 v_interp_p2_f32 v7, [v7], v1, 2, 3, [m0] ; C81D0E01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[13:15], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800700 00440D02 v_mul_f32_e32 v16, v6, v6 ; 10200D06 v_mad_f32 v16, v8, v8, v16 ; D2820010 04421108 v_mad_f32 v16, v11, v11, v16 ; D2820010 0442170B s_buffer_load_dword s0, s[12:15], 0x20 ; C2000D20 s_buffer_load_dword s1, s[12:15], 0x21 ; C2008D21 s_buffer_load_dword s2, s[12:15], 0x22 ; C2010D22 s_buffer_load_dword s3, s[12:15], 0x24 ; C2018D24 s_buffer_load_dword s8, s[12:15], 0x25 ; C2040D25 v_rsq_clamp_f32_e64 v17, |v16| ; D3580111 00000110 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v18, 2.0, v13, -1.0 ; D2820012 03CE1AF4 v_mad_f32 v19, 2.0, v14, -1.0 ; D2820013 03CE1CF4 v_mad_f32 v20, 2.0, v15, -1.0 ; D2820014 03CE1EF4 v_min_f32_e32 v13, 0x7f7fffff, v17 ; 1E1A22FF 7F7FFFFF v_mul_f32_e32 v6, v6, v13 ; 100C1B06 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s0, v6 ; 100C0C00 v_mul_f32_e32 v8, v8, v13 ; 10101B08 v_mad_f32 v6, -s1, v8, -v6 ; D2820006 A41A1001 v_mul_f32_e32 v8, v11, v13 ; 10101B0B v_mad_f32 v6, -s2, v8, v6 ; D2820006 241A1002 v_subrev_f32_e32 v6, s3, v6 ; 0A0C0C03 v_mul_f32_e32 v6, s8, v6 ; 100C0C08 v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80 v_mul_f32_e32 v6, v6, v6 ; 100C0D06 v_sub_f32_e32 v11, 1.0, v16 ; 081620F2 v_mul_f32_e32 v8, v11, v6 ; 10100D0B v_cmp_gt_f32_e32 vcc, 0, v8 ; 7C081080 v_cndmask_b32_e64 v13, 0, -1.0, vcc ; D200000D 01A9E680 v_cndmask_b32_e64 v13, v13, -1.0, vcc ; D200000D 01A9E70D v_cndmask_b32_e64 v13, v13, -1.0, vcc ; D200000D 01A9E70D v_cndmask_b32_e64 v14, v13, -1.0, vcc ; D200000E 01A9E70D s_movk_i32 s0, 0xf00 ; B0000F00 s_buffer_load_dword s22, s[12:15], s0 ; C20B0C00 v_mul_f32_e32 v13, v4, v4 ; 101A0904 v_mad_f32 v13, v5, v5, v13 ; D282000D 04360B05 v_mad_f32 v13, v7, v7, v13 ; D282000D 04360F07 v_rsq_clamp_f32_e32 v21, v13 ; 7E2A590D v_mul_f32_e32 v13, v9, v9 ; 101A1309 v_mad_f32 v13, v10, v10, v13 ; D282000D 0436150A v_mad_f32 v13, v12, v12, v13 ; D282000D 0436190C v_rsq_clamp_f32_e32 v22, v13 ; 7E2C590D s_buffer_load_dword s8, s[12:15], 0x1f ; C2040D1F s_buffer_load_dword s20, s[12:15], 0x0 ; C20A0D00 s_buffer_load_dword s21, s[12:15], 0x1 ; C20A8D01 s_buffer_load_dword s19, s[12:15], 0x2 ; C2098D02 s_buffer_load_dword s9, s[12:15], 0x10 ; C2048D10 s_buffer_load_dword s10, s[12:15], 0x11 ; C2050D11 s_buffer_load_dword s11, s[12:15], 0x12 ; C2058D12 s_buffer_load_dword s23, s[12:15], 0x13 ; C20B8D13 s_buffer_load_dword s18, s[12:15], 0x14 ; C2090D14 s_buffer_load_dword s17, s[12:15], 0x15 ; C2088D15 s_buffer_load_dword s16, s[12:15], 0x16 ; C2080D16 s_buffer_load_dword s24, s[12:15], 0x17 ; C20C0D17 s_buffer_load_dword s3, s[12:15], 0x18 ; C2018D18 s_buffer_load_dword s2, s[12:15], 0x1c ; C2010D1C s_buffer_load_dword s1, s[12:15], 0x1d ; C2008D1D s_buffer_load_dword s0, s[12:15], 0x1e ; C2000D1E v_mul_f32_e32 v13, v18, v18 ; 101A2512 v_mad_f32 v13, v19, v19, v13 ; D282000D 04362713 v_mad_f32 v13, v20, v20, v13 ; D282000D 04362914 v_rsq_clamp_f32_e32 v23, v13 ; 7E2E590D v_mov_b32_e32 v13, 1.0 ; 7E1A02F2 v_cmpx_le_f32_e32 vcc, 0, v14 ; 7C261C80 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_mov_b32_e32 v14, 1.0 ; 7E1C02F2 v_mov_b32_e32 v15, 1.0 ; 7E1E02F2 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ne_i32_e64 s[26:27], 0, s22 ; D10A001A 00002C80 v_mov_b32_e32 v16, s23 ; 7E200217 v_mov_b32_e32 v17, s24 ; 7E220218 s_and_saveexec_b64 s[22:23], s[26:27] ; BE96241A s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E s_cbranch_execz BB0_2 ; BF880000 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 v_interp_p1_f32 v13, v0, 3, 4, [m0] ; C8341300 s_buffer_load_dword s36, s[12:15], 0x6 ; C2120D06 s_buffer_load_dword s37, s[12:15], 0x7 ; C2128D07 s_buffer_load_dword s38, s[12:15], 0x4 ; C2130D04 s_buffer_load_dword s39, s[12:15], 0x5 ; C2138D05 v_interp_p2_f32 v13, [v13], v1, 3, 4, [m0] ; C8351301 v_interp_p1_f32 v14, v0, 1, 4, [m0] ; C8381100 v_rcp_f32_e32 v13, v13 ; 7E1A550D v_interp_p2_f32 v14, [v14], v1, 1, 4, [m0] ; C8391101 v_interp_p1_f32 v0, v0, 0, 4, [m0] ; C8001000 v_interp_p2_f32 v0, [v0], v1, 0, 4, [m0] ; C8011001 v_mul_f32_e32 v0, v0, v13 ; 10001B00 v_mul_f32_e32 v1, v14, v13 ; 10021B0E s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v13, s37 ; 7E1A0225 v_mad_f32 v13, s38, v0, v13 ; D282000D 04360026 v_mov_b32_e32 v0, s36 ; 7E000224 v_mad_f32 v14, s39, v1, v0 ; D282000E 04020227 image_sample v[13:15], 7, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[24:31], s[32:35] ; F0800700 01060D0D s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[22:23] ; 88FE167E v_min_f32_e32 v0, 0x7f7fffff, v21 ; 1E002AFF 7F7FFFFF v_min_f32_e32 v1, 0x7f7fffff, v22 ; 1E022CFF 7F7FFFFF v_mul_f32_e32 v9, v1, v9 ; 10121301 v_mul_f32_e32 v10, v1, v10 ; 10141501 v_mul_f32_e32 v1, v1, v12 ; 10021901 v_min_f32_e32 v12, 0x7f7fffff, v23 ; 1E182EFF 7F7FFFFF v_mul_f32_e32 v18, v12, v18 ; 1024250C v_mul_f32_e32 v19, v12, v19 ; 1026270C v_mul_f32_e32 v12, v12, v20 ; 1018290C v_sub_f32_e64 v20, 1.0, s20 ; D2080014 000028F2 v_sub_f32_e64 v21, 1.0, s21 ; D2080015 00002AF2 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710 s_load_dwordx8 s[32:39], s[6:7], 0x18 ; C0D00718 v_sub_f32_e64 v22, 1.0, s19 ; D2080016 000026F2 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800700 00661702 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v20, v23, v20 ; 10282917 v_mul_f32_e32 v21, v24, v21 ; 102A2B18 v_mul_f32_e32 v22, v25, v22 ; 102C2D19 image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[20:23] ; F0800700 00A81702 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v17, v23, s18 ; D2820002 004A2F11 v_mad_f32 v3, v24, v17, s17 ; D2820003 00462318 v_mad_f32 v17, v25, v17, s16 ; D2820011 00422319 v_mul_f32_e32 v23, v0, v4 ; 102E0900 v_mul_f32_e32 v23, v23, v18 ; 102E2517 v_mul_f32_e32 v24, v0, v5 ; 10300B00 v_mad_f32 v23, v19, v24, v23 ; D2820017 045E3113 v_mul_f32_e32 v24, v0, v7 ; 10300F00 v_mad_f32 v23, v12, v24, v23 ; D2820017 045E310C v_mul_f32_e32 v24, v18, v23 ; 10302F12 v_mad_f32 v24, v23, v18, v24 ; D2820018 04622517 v_mad_f32 v4, -v4, v0, v24 ; D2820004 24620104 v_mul_f32_e32 v24, v19, v23 ; 10302F13 v_mad_f32 v24, v23, v19, v24 ; D2820018 04622717 v_mad_f32 v5, -v5, v0, v24 ; D2820005 24620105 v_mul_f32_e32 v24, v12, v23 ; 10302F0C v_mad_f32 v23, v23, v12, v24 ; D2820017 04621917 v_mad_f32 v0, -v7, v0, v23 ; D2820000 245E0107 v_mad_f32 v7, v16, v20, s9 ; D2820007 00262910 v_mad_f32 v20, v21, v16, s10 ; D2820014 002A2115 v_mad_f32 v16, v22, v16, s11 ; D2820010 002E2116 v_max_f32_e32 v11, 0, v11 ; 20161680 v_mov_b32_e32 v21, 0xb58637bd ; 7E2A02FF B58637BD v_add_f32_e32 v22, v21, v11 ; 062C1715 v_mov_b32_e32 v23, 0x7fffffff ; 7E2E02FF 7FFFFFFF v_and_b32_e32 v11, v11, v23 ; 36162F0B v_log_f32_e32 v11, v11 ; 7E164F0B v_cmp_gt_f32_e32 vcc, 0, v22 ; 7C082C80 v_mul_f32_e32 v18, v9, v18 ; 10242509 v_mad_f32 v18, v19, v10, v18 ; D2820012 044A1513 v_mul_legacy_f32_e32 v11, s8, v11 ; 0E161608 v_exp_f32_e32 v11, v11 ; 7E164B0B v_cndmask_b32_e64 v11, v11, 0, vcc ; D200000B 01A9010B v_mul_f32_e32 v4, v9, v4 ; 10080909 v_mad_f32 v4, v5, v10, v4 ; D2820004 04121505 v_mad_f32 v5, v12, v1, v18 ; D2820005 044A030C v_mad_f32 v0, v0, v1, v4 ; D2820000 04120300 v_add_f32_e64 v1, 0, v5 clamp ; D2060801 00020A80 v_mul_f32_e32 v4, v1, v7 ; 10080F01 v_mul_f32_e32 v5, v1, v20 ; 100A2901 v_mul_f32_e32 v7, v1, v16 ; 100E2101 v_add_f32_e32 v1, v21, v1 ; 06020315 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_and_b32_e32 v9, v0, v23 ; 36122F00 v_log_f32_e32 v9, v9 ; 7E124F09 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, v4, 0, vcc ; D2000001 01A90104 v_cndmask_b32_e64 v4, v5, 0, vcc ; D2000004 01A90105 v_cndmask_b32_e64 v5, v7, 0, vcc ; D2000005 01A90107 v_add_f32_e32 v0, v21, v0 ; 06000115 v_mul_legacy_f32_e32 v7, s3, v9 ; 0E0E1203 v_mov_b32_e32 v9, 0x41000000 ; 7E1202FF 41000000 v_add_f32_e32 v9, s3, v9 ; 06121203 v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_mul_f32_e32 v7, v7, v9 ; 100E1307 v_mul_f32_e32 v7, 0x3d22f983, v7 ; 100E0EFF 3D22F983 v_mul_f32_e32 v2, v7, v2 ; 10040507 v_mul_f32_e32 v3, v7, v3 ; 10060707 v_mul_f32_e32 v7, v7, v17 ; 100E2307 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v2, 0, vcc ; D2000000 01A90102 v_cndmask_b32_e64 v2, v3, 0, vcc ; D2000002 01A90103 v_cndmask_b32_e64 v3, v7, 0, vcc ; D2000003 01A90107 v_add_f32_e32 v0, v1, v0 ; 06000101 v_add_f32_e32 v1, v4, v2 ; 06020504 v_add_f32_e32 v2, v5, v3 ; 06040705 v_mul_f32_e32 v0, v0, v11 ; 10001700 v_mul_f32_e32 v1, v1, v11 ; 10021701 v_mul_f32_e32 v2, v2, v11 ; 10041702 v_mul_f32_e32 v0, v0, v13 ; 10001B00 v_mul_f32_e32 v1, v1, v14 ; 10021D01 v_mul_f32_e32 v2, v2, v15 ; 10041F02 v_mul_f32_e32 v0, s2, v0 ; 10000002 v_mul_f32_e32 v1, s1, v1 ; 10020201 v_mul_f32_e32 v2, s0, v2 ; 10040400 v_mul_f32_e32 v0, v0, v6 ; 10000D00 v_mul_f32_e32 v1, v1, v6 ; 10020D01 v_mul_f32_e32 v2, v2, v6 ; 10040D02 v_xor_b32_e32 v3, 0x80000000, v8 ; 3A0610FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v0, 0, v0, vcc ; D2000000 01AA0080 v_cndmask_b32_e64 v1, 0, v1, vcc ; D2000001 01AA0280 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cndmask_b32_e64 v1, 0, v2, vcc ; D2000001 01AA0480 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 28 Code Size: 1240 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], COLOR, PERSPECTIVE DCL IN[1], COLOR[1], PERSPECTIVE DCL IN[2], TEXCOORD[0], PERSPECTIVE DCL IN[3], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL CONST[0..15] DCL TEMP[0..4] DCL TEMP[5], LOCAL DCL TEMP[6..7] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { 0.8000, 3.0000, -1.0000, 0.8581} IMM[2] FLT32 { -0.0000, 0.3000, 0.5900, 0.1100} IMM[3] FLT32 { -1.0233, 1.0233, 0.8862, 0.0000} IMM[4] FLT32 { -0.8581, 0.2477, 0.4290, 0.0000} IMM[5] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: TEX TEMP[0], IN[2], SAMP[2], 2D 1: DP3 TEMP[0].w, TEMP[0], IMM[2].yzww 2: LRP TEMP[1].xyz, CONST[15].xxxx, TEMP[0].wwww, TEMP[0] 3: ADD TEMP[1].xyz, TEMP[1], TEMP[1] 4: MAX TEMP[2].xyz, |TEMP[1]|, -IMM[2].xxxx 5: TEX TEMP[1], IN[2], SAMP[3], 2D 6: LRP TEMP[3].xyz, CONST[14].yyyy, TEMP[1], TEMP[0] 7: DP3 TEMP[0].w, TEMP[3], IMM[2].yzww 8: LRP TEMP[1].xyz, CONST[14].zzzz, TEMP[0].wwww, TEMP[3] 9: TEX TEMP[4], IN[2], SAMP[0], 2D 10: MAD TEMP[4].xyz, TEMP[4], IMM[0].xxxx, IMM[0].yyyy 11: DP3 TEMP[5].x, IN[3], IN[3] 12: RSQ TEMP[5].x, TEMP[5].xxxx 13: MIN TEMP[5].x, IMM[5].xxxx, TEMP[5].xxxx 14: MUL TEMP[6].xyz, IN[3], TEMP[5].xxxx 15: DP3 TEMP[0].w, TEMP[4], TEMP[6] 16: MAX TEMP[1].w, TEMP[0].wwww, IMM[0].wwww 17: MOV_SAT TEMP[0].w, TEMP[1].wwww 18: ADD TEMP[1].w, -TEMP[1].wwww, IMM[0].zzzz 19: ADD TEMP[2].w, TEMP[0].wwww, IMM[2].xxxx 20: POW TEMP[3].w, |TEMP[0].wwww|, CONST[14].wwww 21: CMP TEMP[0].w, TEMP[2].wwww, IMM[0].wwww, TEMP[3].wwww 22: MUL TEMP[7].xyz, TEMP[0].wwww, CONST[13] 23: MAD TEMP[1].xyz, TEMP[1], TEMP[7], -TEMP[3] 24: TEX TEMP[7], IN[2], SAMP[1], 2D 25: MUL TEMP[0].w, TEMP[0].wwww, TEMP[7].yyyy 26: MAD TEMP[1].xyz, TEMP[0].wwww, TEMP[1], TEMP[3] 27: MAD TEMP[2].xyz, TEMP[2], TEMP[2], -TEMP[1] 28: DP3 TEMP[5].x, TEMP[4], TEMP[4] 29: RSQ TEMP[5].x, TEMP[5].xxxx 30: MIN TEMP[5].x, IMM[5].xxxx, TEMP[5].xxxx 31: MUL TEMP[3].xyz, TEMP[4], TEMP[5].xxxx 32: DP3 TEMP[0].w, TEMP[3], TEMP[6] 33: MUL TEMP[7].xyw, TEMP[0].wwww, TEMP[3].xyzz 34: MAD TEMP[6].xyz, TEMP[7].xyww, IMM[0].xxxx, -TEMP[6] 35: DP3 TEMP[0].w, TEMP[6], TEMP[4] 36: MAX TEMP[2].w, TEMP[0].wwww, IMM[0].wwww 37: MIN TEMP[0].w, TEMP[2].wwww, IMM[1].xxxx 38: MUL TEMP[0].w, TEMP[0].wwww, CONST[15].yyyy 39: MUL TEMP[0].w, TEMP[0].wwww, TEMP[7].zzzz 40: MAD TEMP[1].xyz, TEMP[0].wwww, TEMP[2], TEMP[1] 41: MUL TEMP[0].w, |TEMP[1].wwww|, |TEMP[1].wwww| 42: MUL TEMP[0].w, TEMP[0].wwww, |TEMP[1].wwww| 43: ADD TEMP[1].w, |TEMP[1].wwww|, IMM[2].xxxx 44: MUL TEMP[0].w, TEMP[0].wwww, CONST[14].xxxx 45: MUL TEMP[0].w, TEMP[7].zzzz, TEMP[0].wwww 46: MUL TEMP[0].xyz, TEMP[0], TEMP[0].wwww 47: CMP TEMP[0].xyz, TEMP[1].wwww, IMM[0].wwww, TEMP[0] 48: ADD TEMP[0].xyz, TEMP[1], TEMP[0] 49: DP3 TEMP[0].w, TEMP[0], IMM[2].yzww 50: LRP TEMP[1].xyz, CONST[15].zzzz, TEMP[0].wwww, TEMP[0] 51: MUL TEMP[0].xyz, TEMP[1], CONST[15].wwww 52: MOV TEMP[1].z, IMM[0].zzzz 53: ADD TEMP[1].xyz, TEMP[1].zzzz, -CONST[0] 54: MUL TEMP[0].xyz, TEMP[0], TEMP[1] 55: MAD TEMP[0].xyz, TEMP[0], CONST[4].wwww, CONST[4] 56: DP3 TEMP[5].x, IN[1], IN[1] 57: RSQ TEMP[5].x, TEMP[5].xxxx 58: MIN TEMP[5].x, IMM[5].xxxx, TEMP[5].xxxx 59: MUL TEMP[1].xyz, IN[1], TEMP[5].xxxx 60: DP3 TEMP[5].x, IN[0], IN[0] 61: RSQ TEMP[5].x, TEMP[5].xxxx 62: MIN TEMP[5].x, IMM[5].xxxx, TEMP[5].xxxx 63: MUL TEMP[2].xyz, IN[0], TEMP[5].xxxx 64: MUL TEMP[4].xyz, TEMP[1].zxyw, TEMP[2].yzxw 65: MAD TEMP[4].xyz, TEMP[1].yzxw, TEMP[2].zxyw, -TEMP[4] 66: DP3 TEMP[1].y, TEMP[1], TEMP[3] 67: DP3 TEMP[1].z, TEMP[2], TEMP[3] 68: MUL TEMP[2].xyz, TEMP[4], IN[1].wwww 69: DP3 TEMP[1].x, TEMP[2], TEMP[3] 70: MUL TEMP[2].xz, TEMP[1], TEMP[1].yyyy 71: MUL TEMP[3].xyz, TEMP[1], TEMP[1].xyxw 72: MUL TEMP[4].xyz, TEMP[1], IMM[3].xyxw 73: MAD TEMP[2].w, TEMP[1].zzzz, TEMP[1].zzzz, -TEMP[3].xxxx 74: MAD TEMP[2].y, TEMP[3].yyyy, IMM[1].yyyy, IMM[1].zzzz 75: MUL TEMP[4].w, TEMP[3].zzzz, IMM[1].wwww 76: MUL TEMP[1], TEMP[2], IMM[4].xyxz 77: DP4 TEMP[0].w, CONST[8], TEMP[1] 78: DP4 TEMP[2].x, CONST[7], TEMP[4] 79: MOV TEMP[2].z, IMM[3].zzzz 80: MAD TEMP[2].x, CONST[6].xxxx, TEMP[2].zzzz, TEMP[2].xxxx 81: ADD TEMP[3].x, TEMP[0].wwww, TEMP[2].xxxx 82: DP4 TEMP[0].w, CONST[10], TEMP[1] 83: DP4 TEMP[1].x, CONST[12], TEMP[1] 84: DP4 TEMP[1].y, CONST[9], TEMP[4] 85: DP4 TEMP[1].z, CONST[11], TEMP[4] 86: MAD TEMP[1].z, CONST[6].zzzz, TEMP[2].zzzz, TEMP[1].zzzz 87: ADD TEMP[3].z, TEMP[1].xxxx, TEMP[1].zzzz 88: MAD TEMP[1].x, CONST[6].yyyy, TEMP[2].zzzz, TEMP[1].yyyy 89: ADD TEMP[3].y, TEMP[0].wwww, TEMP[1].xxxx 90: MAX TEMP[1].xyz, TEMP[3], IMM[0].wwww 91: MUL OUT[0].xyz, TEMP[0], TEMP[1] 92: MOV OUT[0].w, IMM[0].wwww 93: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 252) %69 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %70 = load <32 x i8>, <32 x i8> addrspace(2)* %69, align 32, !tbaa !0 %71 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %74 = bitcast <8 x i32> addrspace(2)* %73 to <32 x i8> addrspace(2)* %75 = load <32 x i8>, <32 x i8> addrspace(2)* %74, align 32, !tbaa !0 %76 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %77 = bitcast <4 x i32> addrspace(2)* %76 to <16 x i8> addrspace(2)* %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 %79 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %80 = bitcast <8 x i32> addrspace(2)* %79 to <32 x i8> addrspace(2)* %81 = load <32 x i8>, <32 x i8> addrspace(2)* %80, align 32, !tbaa !0 %82 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %83 = bitcast <4 x i32> addrspace(2)* %82 to <16 x i8> addrspace(2)* %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 %85 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %86 = bitcast <8 x i32> addrspace(2)* %85 to <32 x i8> addrspace(2)* %87 = load <32 x i8>, <32 x i8> addrspace(2)* %86, align 32, !tbaa !0 %88 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %89 = bitcast <4 x i32> addrspace(2)* %88 to <16 x i8> addrspace(2)* %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 %91 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %103 = bitcast float %98 to i32 %104 = bitcast float %99 to i32 %105 = insertelement <2 x i32> undef, i32 %103, i32 0 %106 = insertelement <2 x i32> %105, i32 %104, i32 1 %107 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %106, <32 x i8> %81, <16 x i8> %84, i32 2) %108 = extractelement <4 x float> %107, i32 0 %109 = extractelement <4 x float> %107, i32 1 %110 = extractelement <4 x float> %107, i32 2 %111 = fmul float %108, 0x3FD3333340000000 %112 = fmul float %109, 0x3FE2E147A0000000 %113 = fadd float %112, %111 %114 = fmul float %110, 0x3FBC28F5C0000000 %115 = fadd float %113, %114 %116 = call float @llvm.AMDGPU.lrp(float %65, float %115, float %108) %117 = call float @llvm.AMDGPU.lrp(float %65, float %115, float %109) %118 = call float @llvm.AMDGPU.lrp(float %65, float %115, float %110) %119 = fadd float %116, %116 %120 = fadd float %117, %117 %121 = fadd float %118, %118 %122 = call float @fabs(float %119) %123 = call float @llvm.maxnum.f32(float %122, float 0x3EB0C6F7A0000000) %124 = call float @fabs(float %120) %125 = call float @llvm.maxnum.f32(float %124, float 0x3EB0C6F7A0000000) %126 = call float @fabs(float %121) %127 = call float @llvm.maxnum.f32(float %126, float 0x3EB0C6F7A0000000) %128 = bitcast float %98 to i32 %129 = bitcast float %99 to i32 %130 = insertelement <2 x i32> undef, i32 %128, i32 0 %131 = insertelement <2 x i32> %130, i32 %129, i32 1 %132 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %131, <32 x i8> %87, <16 x i8> %90, i32 2) %133 = extractelement <4 x float> %132, i32 0 %134 = extractelement <4 x float> %132, i32 1 %135 = extractelement <4 x float> %132, i32 2 %136 = call float @llvm.AMDGPU.lrp(float %62, float %133, float %108) %137 = call float @llvm.AMDGPU.lrp(float %62, float %134, float %109) %138 = call float @llvm.AMDGPU.lrp(float %62, float %135, float %110) %139 = fmul float %136, 0x3FD3333340000000 %140 = fmul float %137, 0x3FE2E147A0000000 %141 = fadd float %140, %139 %142 = fmul float %138, 0x3FBC28F5C0000000 %143 = fadd float %141, %142 %144 = call float @llvm.AMDGPU.lrp(float %63, float %143, float %136) %145 = call float @llvm.AMDGPU.lrp(float %63, float %143, float %137) %146 = call float @llvm.AMDGPU.lrp(float %63, float %143, float %138) %147 = bitcast float %98 to i32 %148 = bitcast float %99 to i32 %149 = insertelement <2 x i32> undef, i32 %147, i32 0 %150 = insertelement <2 x i32> %149, i32 %148, i32 1 %151 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %150, <32 x i8> %70, <16 x i8> %72, i32 2) %152 = extractelement <4 x float> %151, i32 0 %153 = extractelement <4 x float> %151, i32 1 %154 = extractelement <4 x float> %151, i32 2 %155 = fmul float %152, 2.000000e+00 %156 = fadd float %155, -1.000000e+00 %157 = fmul float %153, 2.000000e+00 %158 = fadd float %157, -1.000000e+00 %159 = fmul float %154, 2.000000e+00 %160 = fadd float %159, -1.000000e+00 %161 = fmul float %100, %100 %162 = fmul float %101, %101 %163 = fadd float %162, %161 %164 = fmul float %102, %102 %165 = fadd float %163, %164 %166 = call float @llvm.AMDGPU.rsq.clamped.f32(float %165) %167 = call float @llvm.minnum.f32(float %166, float 0x47EFFFFFE0000000) %168 = fmul float %100, %167 %169 = fmul float %101, %167 %170 = fmul float %102, %167 %171 = fmul float %156, %168 %172 = fmul float %158, %169 %173 = fadd float %172, %171 %174 = fmul float %160, %170 %175 = fadd float %173, %174 %176 = call float @llvm.maxnum.f32(float %175, float 0.000000e+00) %177 = call float @llvm.AMDIL.clamp.(float %176, float 0.000000e+00, float 1.000000e+00) %178 = fsub float 1.000000e+00, %176 %179 = fadd float %177, 0xBEB0C6F7A0000000 %180 = call float @fabs(float %177) %181 = call float @llvm.pow.f32(float %180, float %64) %182 = call float @llvm.AMDGPU.cndlt(float %179, float 0.000000e+00, float %181) %183 = fmul float %182, %58 %184 = fmul float %182, %59 %185 = fmul float %182, %60 %186 = fmul float %144, %183 %187 = fsub float %186, %136 %188 = fmul float %145, %184 %189 = fsub float %188, %137 %190 = fmul float %146, %185 %191 = fsub float %190, %138 %192 = bitcast float %98 to i32 %193 = bitcast float %99 to i32 %194 = insertelement <2 x i32> undef, i32 %192, i32 0 %195 = insertelement <2 x i32> %194, i32 %193, i32 1 %196 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %195, <32 x i8> %75, <16 x i8> %78, i32 2) %197 = extractelement <4 x float> %196, i32 1 %198 = extractelement <4 x float> %196, i32 2 %199 = fmul float %182, %197 %200 = fmul float %199, %187 %201 = fadd float %200, %136 %202 = fmul float %199, %189 %203 = fadd float %202, %137 %204 = fmul float %199, %191 %205 = fadd float %204, %138 %206 = fmul float %123, %123 %207 = fsub float %206, %201 %208 = fmul float %125, %125 %209 = fsub float %208, %203 %210 = fmul float %127, %127 %211 = fsub float %210, %205 %212 = fmul float %156, %156 %213 = fmul float %158, %158 %214 = fadd float %213, %212 %215 = fmul float %160, %160 %216 = fadd float %214, %215 %217 = call float @llvm.AMDGPU.rsq.clamped.f32(float %216) %218 = call float @llvm.minnum.f32(float %217, float 0x47EFFFFFE0000000) %219 = fmul float %156, %218 %220 = fmul float %158, %218 %221 = fmul float %160, %218 %222 = fmul float %219, %168 %223 = fmul float %220, %169 %224 = fadd float %223, %222 %225 = fmul float %221, %170 %226 = fadd float %224, %225 %227 = fmul float %226, %219 %228 = fmul float %226, %220 %229 = fmul float %226, %221 %230 = fmul float %227, 2.000000e+00 %231 = fsub float %230, %168 %232 = fmul float %228, 2.000000e+00 %233 = fsub float %232, %169 %234 = fmul float %229, 2.000000e+00 %235 = fsub float %234, %170 %236 = fmul float %231, %156 %237 = fmul float %233, %158 %238 = fadd float %237, %236 %239 = fmul float %235, %160 %240 = fadd float %238, %239 %241 = call float @llvm.maxnum.f32(float %240, float 0.000000e+00) %242 = call float @llvm.minnum.f32(float %241, float 0x3FE99999A0000000) %243 = fmul float %242, %66 %244 = fmul float %243, %198 %245 = fmul float %244, %207 %246 = fadd float %245, %201 %247 = fmul float %244, %209 %248 = fadd float %247, %203 %249 = fmul float %244, %211 %250 = fadd float %249, %205 %251 = call float @fabs(float %178) %252 = call float @fabs(float %178) %253 = fmul float %251, %252 %254 = call float @fabs(float %178) %255 = fmul float %253, %254 %256 = call float @fabs(float %178) %257 = fadd float %256, 0xBEB0C6F7A0000000 %258 = fmul float %255, %61 %259 = fmul float %198, %258 %260 = fmul float %108, %259 %261 = fmul float %109, %259 %262 = fmul float %110, %259 %263 = call float @llvm.AMDGPU.cndlt(float %257, float 0.000000e+00, float %260) %264 = call float @llvm.AMDGPU.cndlt(float %257, float 0.000000e+00, float %261) %265 = call float @llvm.AMDGPU.cndlt(float %257, float 0.000000e+00, float %262) %266 = fadd float %246, %263 %267 = fadd float %248, %264 %268 = fadd float %250, %265 %269 = fmul float %266, 0x3FD3333340000000 %270 = fmul float %267, 0x3FE2E147A0000000 %271 = fadd float %270, %269 %272 = fmul float %268, 0x3FBC28F5C0000000 %273 = fadd float %271, %272 %274 = call float @llvm.AMDGPU.lrp(float %67, float %273, float %266) %275 = call float @llvm.AMDGPU.lrp(float %67, float %273, float %267) %276 = call float @llvm.AMDGPU.lrp(float %67, float %273, float %268) %277 = fmul float %274, %68 %278 = fmul float %275, %68 %279 = fmul float %276, %68 %280 = fsub float 1.000000e+00, %24 %281 = fsub float 1.000000e+00, %25 %282 = fsub float 1.000000e+00, %26 %283 = fmul float %277, %280 %284 = fmul float %278, %281 %285 = fmul float %279, %282 %286 = fmul float %283, %30 %287 = fadd float %286, %27 %288 = fmul float %284, %30 %289 = fadd float %288, %28 %290 = fmul float %285, %30 %291 = fadd float %290, %29 %292 = fmul float %94, %94 %293 = fmul float %95, %95 %294 = fadd float %293, %292 %295 = fmul float %96, %96 %296 = fadd float %294, %295 %297 = call float @llvm.AMDGPU.rsq.clamped.f32(float %296) %298 = call float @llvm.minnum.f32(float %297, float 0x47EFFFFFE0000000) %299 = fmul float %94, %298 %300 = fmul float %95, %298 %301 = fmul float %96, %298 %302 = fmul float %91, %91 %303 = fmul float %92, %92 %304 = fadd float %303, %302 %305 = fmul float %93, %93 %306 = fadd float %304, %305 %307 = call float @llvm.AMDGPU.rsq.clamped.f32(float %306) %308 = call float @llvm.minnum.f32(float %307, float 0x47EFFFFFE0000000) %309 = fmul float %91, %308 %310 = fmul float %92, %308 %311 = fmul float %93, %308 %312 = fmul float %301, %310 %313 = fmul float %299, %311 %314 = fmul float %300, %309 %315 = fmul float %300, %311 %316 = fsub float %315, %312 %317 = fmul float %301, %309 %318 = fsub float %317, %313 %319 = fmul float %299, %310 %320 = fsub float %319, %314 %321 = fmul float %299, %219 %322 = fmul float %300, %220 %323 = fadd float %322, %321 %324 = fmul float %301, %221 %325 = fadd float %323, %324 %326 = fmul float %309, %219 %327 = fmul float %310, %220 %328 = fadd float %327, %326 %329 = fmul float %311, %221 %330 = fadd float %328, %329 %331 = fmul float %316, %97 %332 = fmul float %318, %97 %333 = fmul float %320, %97 %334 = fmul float %331, %219 %335 = fmul float %332, %220 %336 = fadd float %335, %334 %337 = fmul float %333, %221 %338 = fadd float %336, %337 %339 = fmul float %338, %325 %340 = fmul float %330, %325 %341 = fmul float %338, %338 %342 = fmul float %325, %325 %343 = fmul float %330, %338 %344 = fmul float %338, 0xBFF05F8CE0000000 %345 = fmul float %325, 0x3FF05F8CE0000000 %346 = fmul float %330, 0xBFF05F8CE0000000 %347 = fmul float %330, %330 %348 = fsub float %347, %341 %349 = fmul float %342, 3.000000e+00 %350 = fadd float %349, -1.000000e+00 %351 = fmul float %343, 0x3FEB756F20000000 %352 = fmul float %339, 0xBFEB756F20000000 %353 = fmul float %350, 0x3FCFB4E7C0000000 %354 = fmul float %340, 0xBFEB756F20000000 %355 = fmul float %348, 0x3FDB756F20000000 %356 = fmul float %38, %352 %357 = fmul float %39, %353 %358 = fadd float %356, %357 %359 = fmul float %40, %354 %360 = fadd float %358, %359 %361 = fmul float %41, %355 %362 = fadd float %360, %361 %363 = fmul float %34, %344 %364 = fmul float %35, %345 %365 = fadd float %363, %364 %366 = fmul float %36, %346 %367 = fadd float %365, %366 %368 = fmul float %37, %351 %369 = fadd float %367, %368 %370 = fmul float %31, 0x3FEC5BFA00000000 %371 = fadd float %370, %369 %372 = fadd float %362, %371 %373 = fmul float %46, %352 %374 = fmul float %47, %353 %375 = fadd float %373, %374 %376 = fmul float %48, %354 %377 = fadd float %375, %376 %378 = fmul float %49, %355 %379 = fadd float %377, %378 %380 = fmul float %54, %352 %381 = fmul float %55, %353 %382 = fadd float %380, %381 %383 = fmul float %56, %354 %384 = fadd float %382, %383 %385 = fmul float %57, %355 %386 = fadd float %384, %385 %387 = fmul float %42, %344 %388 = fmul float %43, %345 %389 = fadd float %387, %388 %390 = fmul float %44, %346 %391 = fadd float %389, %390 %392 = fmul float %45, %351 %393 = fadd float %391, %392 %394 = fmul float %50, %344 %395 = fmul float %51, %345 %396 = fadd float %394, %395 %397 = fmul float %52, %346 %398 = fadd float %396, %397 %399 = fmul float %53, %351 %400 = fadd float %398, %399 %401 = fmul float %33, 0x3FEC5BFA00000000 %402 = fadd float %401, %400 %403 = fadd float %386, %402 %404 = fmul float %32, 0x3FEC5BFA00000000 %405 = fadd float %404, %393 %406 = fadd float %379, %405 %407 = call float @llvm.maxnum.f32(float %372, float 0.000000e+00) %408 = call float @llvm.maxnum.f32(float %406, float 0.000000e+00) %409 = call float @llvm.maxnum.f32(float %403, float 0.000000e+00) %410 = fmul float %287, %407 %411 = fmul float %289, %408 %412 = fmul float %291, %409 %413 = call i32 @llvm.SI.packf16(float %410, float %411) %414 = bitcast i32 %413 to float %415 = call i32 @llvm.SI.packf16(float %412, float 0.000000e+00) %416 = bitcast i32 %415 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %414, float %416, float %414, float %416) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700 v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s44, s[0:3], 0x39 ; C2160139 s_buffer_load_dword s45, s[0:3], 0x3a ; C216813A v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800 v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801 v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900 v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v1, 1.0, s44 ; D2080001 000058F2 s_load_dwordx4 s[32:35], s[4:5], 0x8 ; C0900508 s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 s_load_dwordx8 s[24:31], s[6:7], 0x18 ; C0CC0718 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[13:15], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[36:43], s[32:35] ; F0800700 01090D09 image_sample v[16:18], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[24:31], s[20:23] ; F0800700 00A61009 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v19, v13, v1 ; 1026030D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, s44, v16, v19 ; D2820010 044E202C v_mul_f32_e32 v19, v14, v1 ; 1026030E v_mad_f32 v17, s44, v17, v19 ; D2820011 044E222C v_mul_f32_e32 v1, v15, v1 ; 1002030F v_mad_f32 v1, s44, v18, v1 ; D2820001 0406242C s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[12:19], s[8:11] ; F0800700 00431209 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v18, 2.0, v18, -1.0 ; D2820012 03CE24F4 s_buffer_load_dword s4, s[0:3], 0x3c ; C202013C v_mad_f32 v19, 2.0, v19, -1.0 ; D2820013 03CE26F4 v_mad_f32 v20, 2.0, v20, -1.0 ; D2820014 03CE28F4 v_mul_f32_e32 v21, 0x3e99999a, v13 ; 102A1AFF 3E99999A v_mov_b32_e32 v22, 0x3f170a3d ; 7E2C02FF 3F170A3D v_mad_f32 v21, v14, v22, v21 ; D2820015 04562D0E v_mov_b32_e32 v23, 0x3de147ae ; 7E2E02FF 3DE147AE v_mad_f32 v21, v15, v23, v21 ; D2820015 04562F0F s_buffer_load_dword s5, s[0:3], 0x3d ; C202813D s_buffer_load_dword s6, s[0:3], 0x3e ; C203013E s_buffer_load_dword s7, s[0:3], 0x3f ; C203813F s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v24, 1.0, s4 ; D2080018 000008F2 v_mul_f32_e32 v25, v13, v24 ; 1032310D v_mad_f32 v25, s4, v21, v25 ; D2820019 04662A04 v_mul_f32_e32 v26, v14, v24 ; 1034310E v_mad_f32 v26, s4, v21, v26 ; D282001A 046A2A04 v_mul_f32_e32 v27, v15, v24 ; 1036310F v_mad_f32 v27, s4, v21, v27 ; D282001B 046E2A04 v_mad_f32 v25, v24, v13, v25 ; D2820019 04661B18 v_mul_f32_e32 v28, v11, v11 ; 1038170B v_mad_f32 v28, v12, v12, v28 ; D282001C 0472190C v_mad_f32 v28, v0, v0, v28 ; D282001C 04720100 v_rsq_clamp_f32_e32 v28, v28 ; 7E38591C v_mad_f32 v26, v24, v14, v26 ; D282001A 046A1D18 v_mad_f32 v24, v24, v15, v27 ; D2820018 046E1F18 image_sample v[9:10], 6, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[24:31], s[20:23] ; F0800600 00A60909 v_min_f32_e32 v27, 0x7f7fffff, v28 ; 1E3638FF 7F7FFFFF s_buffer_load_dword s8, s[0:3], 0x38 ; C2040138 v_mul_f32_e32 v28, v27, v11 ; 1038171B v_mul_f32_e32 v29, v27, v12 ; 103A191B v_mul_f32_e32 v30, v28, v18 ; 103C251C v_mad_f32 v30, v19, v29, v30 ; D282001E 047A3B13 v_mul_f32_e32 v31, v27, v0 ; 103E011B v_mad_f32 v30, v20, v31, v30 ; D282001E 047A3F14 v_max_f32_e32 v30, 0, v30 ; 203C3C80 v_sub_f32_e32 v32, 1.0, v30 ; 08403CF2 v_mul_f32_e64 v33, |v32|, |v32| ; D2100321 00024120 v_mul_f32_e64 v33, v33, |v32| ; D2100221 00024121 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v33, s8, v33 ; 10424208 v_mul_f32_e32 v33, v33, v10 ; 10421521 v_mul_f32_e32 v13, v33, v13 ; 101A1B21 v_mul_f32_e32 v14, v33, v14 ; 101C1D21 v_mul_f32_e32 v15, v33, v15 ; 101E1F21 v_mul_f32_e32 v33, 0x3e99999a, v16 ; 104220FF 3E99999A v_mad_f32 v33, v17, v22, v33 ; D2820021 04862D11 v_mad_f32 v33, v1, v23, v33 ; D2820021 04862F01 v_sub_f32_e64 v34, 1.0, s45 ; D2080022 00005AF2 v_mul_f32_e32 v35, v16, v34 ; 10464510 v_mad_f32 v35, s45, v33, v35 ; D2820023 048E422D v_mul_f32_e32 v36, v17, v34 ; 10484511 v_mad_f32 v36, s45, v33, v36 ; D2820024 0492422D v_mul_f32_e32 v34, v1, v34 ; 10444501 v_mad_f32 v33, s45, v33, v34 ; D2820021 048A422D s_buffer_load_dword s8, s[0:3], 0x3b ; C204013B v_add_f32_e64 v30, 0, v30 clamp ; D206081E 00023C80 s_buffer_load_dword s9, s[0:3], 0x34 ; C2048134 v_and_b32_e32 v34, 0x7fffffff, v30 ; 36443CFF 7FFFFFFF v_log_f32_e32 v34, v34 ; 7E444F22 s_buffer_load_dword s10, s[0:3], 0x35 ; C2050135 s_buffer_load_dword s11, s[0:3], 0x36 ; C2058136 v_mov_b32_e32 v37, 0xb58637bd ; 7E4A02FF B58637BD v_add_f32_e32 v30, v37, v30 ; 063C3D25 v_cmp_gt_f32_e32 vcc, 0, v30 ; 7C083C80 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_legacy_f32_e32 v30, s8, v34 ; 0E3C4408 v_exp_f32_e32 v30, v30 ; 7E3C4B1E v_cndmask_b32_e64 v30, v30, 0, vcc ; D200001E 01A9011E v_mul_f32_e32 v34, s9, v30 ; 10443C09 v_mad_f32 v34, v35, v34, -v16 ; D2820022 84424523 v_mul_f32_e32 v35, s10, v30 ; 10463C0A v_mad_f32 v35, v36, v35, -v17 ; D2820023 84464724 v_mul_f32_e32 v36, s11, v30 ; 10483C0B v_mad_f32 v33, v33, v36, -v1 ; D2820021 84064921 v_mul_f32_e32 v9, v9, v30 ; 10123D09 v_mul_f32_e32 v30, v18, v18 ; 103C2512 v_mad_f32 v30, v19, v19, v30 ; D282001E 047A2713 v_mad_f32 v30, v20, v20, v30 ; D282001E 047A2914 v_rsq_clamp_f32_e32 v30, v30 ; 7E3C591E v_mad_f32 v16, v9, v34, v16 ; D2820010 04424509 v_mad_f32 v17, v9, v35, v17 ; D2820011 04464709 v_mad_f32 v1, v9, v33, v1 ; D2820001 04064309 v_min_f32_e32 v9, 0x7f7fffff, v30 ; 1E123CFF 7F7FFFFF v_mul_f32_e32 v30, v9, v18 ; 103C2509 v_mul_f32_e32 v28, v28, v30 ; 10383D1C v_mul_f32_e32 v33, v9, v19 ; 10422709 v_mad_f32 v28, v33, v29, v28 ; D282001C 04723B21 v_mul_f32_e32 v9, v9, v20 ; 10122909 v_mad_f32 v28, v9, v31, v28 ; D282001C 04723F09 v_mul_f32_e32 v29, v30, v28 ; 103A391E v_mad_f32 v29, v28, v30, v29 ; D282001D 04763D1C v_mad_f32 v11, -v11, v27, v29 ; D282000B 2476370B v_mul_f32_e32 v29, v33, v28 ; 103A3921 v_mad_f32 v29, v28, v33, v29 ; D282001D 0476431C v_mad_f32 v12, -v12, v27, v29 ; D282000C 2476370C v_mul_f32_e32 v29, v9, v28 ; 103A3909 v_mad_f32 v28, v28, v9, v29 ; D282001C 0476131C v_mad_f32 v0, -v0, v27, v28 ; D2820000 24723700 v_mul_f32_e32 v11, v18, v11 ; 10161712 v_mad_f32 v11, v12, v19, v11 ; D282000B 042E270C v_mad_f32 v0, v0, v20, v11 ; D2820000 042E2900 v_max_f32_e32 v0, 0, v0 ; 20000080 v_min_f32_e32 v0, 0x3f4ccccd, v0 ; 1E0000FF 3F4CCCCD v_mul_f32_e32 v0, s5, v0 ; 10000005 v_mul_f32_e32 v0, v10, v0 ; 1000010A v_mad_f32 v10, s4, v21, v25 ; D282000A 04662A04 v_mad_f32 v11, s4, v21, v26 ; D282000B 046A2A04 v_mad_f32 v12, s4, v21, v24 ; D282000C 04622A04 v_mov_b32_e32 v18, 0x358637bd ; 7E2402FF 358637BD v_max_f32_e64 v10, |v10|, v18 ; D220010A 0002250A v_max_f32_e64 v11, |v11|, v18 ; D220010B 0002250B v_max_f32_e64 v12, |v12|, v18 ; D220010C 0002250C v_add_f32_e64 v18, |v32|, v37 ; D2060112 00024B20 v_cmp_gt_f32_e32 vcc, 0, v18 ; 7C082480 s_buffer_load_dword s4, s[0:3], 0x30 ; C2020130 s_buffer_load_dword s5, s[0:3], 0x31 ; C2028131 s_buffer_load_dword s8, s[0:3], 0x32 ; C2040132 s_buffer_load_dword s9, s[0:3], 0x33 ; C2048133 s_buffer_load_dword s10, s[0:3], 0x0 ; C2050100 s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101 s_buffer_load_dword s12, s[0:3], 0x2 ; C2060102 s_buffer_load_dword s13, s[0:3], 0x10 ; C2068110 s_buffer_load_dword s14, s[0:3], 0x11 ; C2070111 s_buffer_load_dword s15, s[0:3], 0x12 ; C2078112 s_buffer_load_dword s16, s[0:3], 0x13 ; C2080113 s_buffer_load_dword s17, s[0:3], 0x18 ; C2088118 s_buffer_load_dword s18, s[0:3], 0x19 ; C2090119 s_buffer_load_dword s19, s[0:3], 0x1a ; C209811A s_buffer_load_dword s20, s[0:3], 0x1c ; C20A011C s_buffer_load_dword s21, s[0:3], 0x1d ; C20A811D s_buffer_load_dword s22, s[0:3], 0x1e ; C20B011E s_buffer_load_dword s23, s[0:3], 0x1f ; C20B811F s_buffer_load_dword s24, s[0:3], 0x20 ; C20C0120 s_buffer_load_dword s25, s[0:3], 0x21 ; C20C8121 s_buffer_load_dword s26, s[0:3], 0x22 ; C20D0122 s_buffer_load_dword s27, s[0:3], 0x23 ; C20D8123 s_buffer_load_dword s28, s[0:3], 0x24 ; C20E0124 s_buffer_load_dword s29, s[0:3], 0x25 ; C20E8125 s_buffer_load_dword s30, s[0:3], 0x26 ; C20F0126 s_buffer_load_dword s31, s[0:3], 0x27 ; C20F8127 s_buffer_load_dword s32, s[0:3], 0x28 ; C2100128 s_buffer_load_dword s33, s[0:3], 0x29 ; C2108129 s_buffer_load_dword s34, s[0:3], 0x2a ; C211012A s_buffer_load_dword s35, s[0:3], 0x2b ; C211812B s_buffer_load_dword s36, s[0:3], 0x2c ; C212012C s_buffer_load_dword s37, s[0:3], 0x2d ; C212812D s_buffer_load_dword s38, s[0:3], 0x2e ; C213012E s_buffer_load_dword s0, s[0:3], 0x2f ; C200012F v_cndmask_b32_e64 v13, v13, 0, vcc ; D200000D 01A9010D v_cndmask_b32_e64 v14, v14, 0, vcc ; D200000E 01A9010E v_cndmask_b32_e64 v15, v15, 0, vcc ; D200000F 01A9010F v_mad_f32 v10, v10, v10, -v16 ; D282000A 8442150A v_mad_f32 v10, v0, v10, v16 ; D282000A 04421500 v_mad_f32 v11, v11, v11, -v17 ; D282000B 8446170B v_mad_f32 v11, v0, v11, v17 ; D282000B 04461700 v_mad_f32 v12, v12, v12, -v1 ; D282000C 8406190C v_mad_f32 v0, v0, v12, v1 ; D2820000 04061900 v_add_f32_e32 v1, v13, v10 ; 0602150D v_add_f32_e32 v10, v14, v11 ; 0614170E v_add_f32_e32 v0, v15, v0 ; 0600010F v_mul_f32_e32 v11, 0x3e99999a, v1 ; 101602FF 3E99999A v_mad_f32 v11, v10, v22, v11 ; D282000B 042E2D0A v_mad_f32 v11, v0, v23, v11 ; D282000B 042E2F00 v_sub_f32_e64 v12, 1.0, s6 ; D208000C 00000CF2 v_mul_f32_e32 v1, v1, v12 ; 10021901 v_mul_f32_e32 v10, v10, v12 ; 1014190A v_mul_f32_e32 v0, v0, v12 ; 10001900 v_mad_f32 v1, s6, v11, v1 ; D2820001 04061606 v_mad_f32 v10, s6, v11, v10 ; D282000A 042A1606 v_mad_f32 v0, s6, v11, v0 ; D2820000 04021606 v_mul_f32_e32 v1, s7, v1 ; 10020207 v_mul_f32_e32 v10, s7, v10 ; 10141407 v_mul_f32_e32 v0, s7, v0 ; 10000007 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v11, 1.0, s10 ; D208000B 000014F2 v_mul_f32_e32 v1, v11, v1 ; 1002030B v_sub_f32_e64 v11, 1.0, s11 ; D208000B 000016F2 v_mul_f32_e32 v10, v11, v10 ; 1014150B v_sub_f32_e64 v11, 1.0, s12 ; D208000B 000018F2 v_mul_f32_e32 v0, v11, v0 ; 1000010B v_mov_b32_e32 v11, s13 ; 7E16020D v_mul_f32_e32 v12, v5, v5 ; 10180B05 v_mad_f32 v12, v6, v6, v12 ; D282000C 04320D06 v_mad_f32 v12, v7, v7, v12 ; D282000C 04320F07 v_rsq_clamp_f32_e32 v12, v12 ; 7E18590C v_mad_f32 v1, s16, v1, v11 ; D2820001 042E0210 v_mov_b32_e32 v11, s14 ; 7E16020E v_mad_f32 v10, s16, v10, v11 ; D282000A 042E1410 v_mov_b32_e32 v11, s15 ; 7E16020F v_mad_f32 v0, s16, v0, v11 ; D2820000 042E0010 v_min_f32_e32 v11, 0x7f7fffff, v12 ; 1E1618FF 7F7FFFFF v_mul_f32_e32 v12, v2, v2 ; 10180502 v_mad_f32 v12, v3, v3, v12 ; D282000C 04320703 v_mad_f32 v12, v4, v4, v12 ; D282000C 04320904 v_rsq_clamp_f32_e32 v12, v12 ; 7E18590C v_mul_f32_e32 v5, v11, v5 ; 100A0B0B v_mul_f32_e32 v6, v11, v6 ; 100C0D0B v_mul_f32_e32 v7, v11, v7 ; 100E0F0B v_min_f32_e32 v11, 0x7f7fffff, v12 ; 1E1618FF 7F7FFFFF v_mul_f32_e32 v2, v11, v2 ; 1004050B v_mul_f32_e32 v3, v11, v3 ; 1006070B v_mul_f32_e32 v4, v11, v4 ; 1008090B v_mul_f32_e32 v11, v3, v7 ; 10160F03 v_mad_f32 v11, v6, v4, -v11 ; D282000B 842E0906 v_mul_f32_e32 v12, v30, v5 ; 10180B1E v_mad_f32 v12, v6, v33, v12 ; D282000C 04324306 v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mad_f32 v6, v5, v3, -v6 ; D2820006 841A0705 v_mul_f32_e32 v5, v4, v5 ; 100A0B04 v_mad_f32 v5, v7, v2, -v5 ; D2820005 84160507 v_mad_f32 v7, v7, v9, v12 ; D2820007 04321307 v_mul_f32_e32 v11, v8, v11 ; 10161708 v_mul_f32_e32 v5, v8, v5 ; 100A0B08 v_mul_f32_e32 v6, v8, v6 ; 100C0D08 v_mul_f32_e32 v8, v30, v11 ; 1010171E v_mad_f32 v5, v5, v33, v8 ; D2820005 04224305 v_mad_f32 v5, v6, v9, v5 ; D2820005 04161306 v_mul_f32_e32 v6, v7, v7 ; 100C0F07 v_mov_b32_e32 v8, 0x40400000 ; 7E1002FF 40400000 v_mad_f32 v6, v8, v6, -1.0 ; D2820006 03CE0D08 v_mul_f32_e32 v6, 0x3e7da73e, v6 ; 100C0CFF 3E7DA73E v_mul_f32_e32 v8, s25, v6 ; 10100C19 v_mul_f32_e32 v11, v7, v5 ; 10160B07 v_mov_b32_e32 v12, 0xbf5bab79 ; 7E1802FF BF5BAB79 v_mul_f32_e32 v11, v12, v11 ; 1016170C v_mad_f32 v8, s24, v11, v8 ; D2820008 04221618 v_mul_f32_e32 v13, s33, v6 ; 101A0C21 v_mad_f32 v13, s32, v11, v13 ; D282000D 04361620 v_mul_f32_e32 v6, s5, v6 ; 100C0C05 v_mad_f32 v6, s4, v11, v6 ; D2820006 041A1604 v_mul_f32_e32 v2, v30, v2 ; 1004051E v_mad_f32 v2, v3, v33, v2 ; D2820002 040A4303 v_mad_f32 v2, v4, v9, v2 ; D2820002 040A1304 v_mul_f32_e32 v3, v7, v2 ; 10060507 v_mul_f32_e32 v3, v12, v3 ; 1006070C v_mad_f32 v4, s26, v3, v8 ; D2820004 0422061A v_mad_f32 v8, s34, v3, v13 ; D2820008 04360622 v_mad_f32 v3, s8, v3, v6 ; D2820003 041A0608 v_mul_f32_e32 v6, v5, v5 ; 100C0B05 v_mad_f32 v6, v2, v2, -v6 ; D2820006 841A0502 v_mul_f32_e32 v6, 0x3edbab79, v6 ; 100C0CFF 3EDBAB79 v_mad_f32 v4, s27, v6, v4 ; D2820004 04120C1B v_mad_f32 v8, s35, v6, v8 ; D2820008 04220C23 v_mad_f32 v3, s9, v6, v3 ; D2820003 040E0C09 v_mul_f32_e32 v6, 0x3f82fc67, v7 ; 100C0EFF 3F82FC67 v_mul_f32_e32 v7, s21, v6 ; 100E0C15 v_mov_b32_e32 v9, 0xbf82fc67 ; 7E1202FF BF82FC67 v_mul_f32_e32 v11, v9, v5 ; 10160B09 v_mad_f32 v7, s20, v11, v7 ; D2820007 041E1614 v_mul_f32_e32 v12, s29, v6 ; 10180C1D v_mad_f32 v12, s28, v11, v12 ; D282000C 0432161C v_mul_f32_e32 v6, s37, v6 ; 100C0C25 v_mad_f32 v6, s36, v11, v6 ; D2820006 041A1624 v_mul_f32_e32 v9, v9, v2 ; 10120509 v_mad_f32 v7, s22, v9, v7 ; D2820007 041E1216 v_mul_f32_e32 v2, v5, v2 ; 10040505 v_mul_f32_e32 v2, 0x3f5bab79, v2 ; 100404FF 3F5BAB79 v_mad_f32 v5, s23, v2, v7 ; D2820005 041E0417 v_mov_b32_e32 v7, 0x3f62dfd0 ; 7E0E02FF 3F62DFD0 v_mad_f32 v5, s17, v7, v5 ; D2820005 04160E11 v_add_f32_e32 v4, v5, v4 ; 06080905 v_mad_f32 v5, s30, v9, v12 ; D2820005 0432121E v_mad_f32 v6, s38, v9, v6 ; D2820006 041A1226 v_mad_f32 v5, s31, v2, v5 ; D2820005 0416041F v_mad_f32 v2, s0, v2, v6 ; D2820002 041A0400 v_mad_f32 v2, s19, v7, v2 ; D2820002 040A0E13 v_add_f32_e32 v2, v2, v3 ; 06040702 v_mad_f32 v3, s18, v7, v5 ; D2820003 04160E12 v_add_f32_e32 v3, v3, v8 ; 06061103 v_max_f32_e32 v4, 0, v4 ; 20080880 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_max_f32_e32 v3, 0, v3 ; 20060680 v_mul_f32_e32 v3, v3, v10 ; 10061503 v_max_f32_e32 v2, 0, v2 ; 20040480 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 40 Code Size: 1892 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], COLOR, PERSPECTIVE DCL IN[1], COLOR[1], PERSPECTIVE DCL IN[2], TEXCOORD[0], PERSPECTIVE DCL IN[3], FACE, CONSTANT DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL CONST[0..13] DCL TEMP[0..1] DCL TEMP[2], LOCAL DCL TEMP[3..5] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, -0.3333} IMM[1] FLT32 { 0.8862, 0.0000, 340282346638528859811704183484516925440.0000, -0.8581} IMM[2] FLT32 { 3.0000, -1.0000, -1.0233, 1.0233} IMM[3] FLT32 { 0.8581, -0.8581, 0.2477, 0.4290} 0: TEX TEMP[0], IN[2], SAMP[2], 2D 1: ADD TEMP[0], TEMP[0].xxxx, IMM[0].wwww 2: KILL_IF TEMP[0] 3: MOV TEMP[0].z, IMM[0].zzzz 4: ADD TEMP[0].xyz, TEMP[0].zzzz, -CONST[13] 5: TEX TEMP[1], IN[2], SAMP[1], 2D 6: MUL TEMP[0].xyz, TEMP[0], TEMP[1] 7: MAD TEMP[0].xyz, TEMP[0], CONST[4].wwww, CONST[4] 8: TEX TEMP[1], IN[2], SAMP[0], 2D 9: MAD TEMP[1].xyz, TEMP[1], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[2].x, TEMP[1], TEMP[1] 11: RSQ TEMP[2].x, TEMP[2].xxxx 12: MIN TEMP[2].x, IMM[1].zzzz, TEMP[2].xxxx 13: MUL TEMP[3].xyz, TEMP[1], TEMP[2].xxxx 14: CMP TEMP[0].w, IN[3].xxxx, IMM[0].zzzz, IMM[0].yyyy 15: MUL TEMP[0].w, TEMP[0].wwww, CONST[0].xxxx 16: MUL TEMP[1].xyz, TEMP[0].wwww, TEMP[3] 17: DP3 TEMP[2].x, IN[1], IN[1] 18: RSQ TEMP[2].x, TEMP[2].xxxx 19: MIN TEMP[2].x, IMM[1].zzzz, TEMP[2].xxxx 20: MUL TEMP[3].xyz, IN[1], TEMP[2].xxxx 21: DP3 TEMP[2].x, IN[0], IN[0] 22: RSQ TEMP[2].x, TEMP[2].xxxx 23: MIN TEMP[2].x, IMM[1].zzzz, TEMP[2].xxxx 24: MUL TEMP[4].xyz, IN[0], TEMP[2].xxxx 25: MUL TEMP[5].xyz, TEMP[3].zxyw, TEMP[4].yzxw 26: MAD TEMP[5].xyz, TEMP[3].yzxw, TEMP[4].zxyw, -TEMP[5] 27: DP3 TEMP[3].y, TEMP[3], TEMP[1] 28: DP3 TEMP[3].z, TEMP[4], TEMP[1] 29: MUL TEMP[4].xyz, TEMP[5], IN[1].wwww 30: DP3 TEMP[3].x, TEMP[4], TEMP[1] 31: MUL TEMP[1].xz, TEMP[3], TEMP[3].yyyy 32: MUL TEMP[4].xyz, TEMP[3], TEMP[3].xyxw 33: MUL TEMP[5].xyz, TEMP[3], IMM[2].zwzw 34: MAD TEMP[1].w, TEMP[3].zzzz, TEMP[3].zzzz, -TEMP[4].xxxx 35: MAD TEMP[1].y, TEMP[4].yyyy, IMM[2].xxxx, IMM[2].yyyy 36: MUL TEMP[5].w, TEMP[4].zzzz, IMM[3].xxxx 37: MUL TEMP[1], TEMP[1], IMM[3].yzyw 38: DP4 TEMP[0].w, CONST[8], TEMP[1] 39: DP4 TEMP[3].x, CONST[7], TEMP[5] 40: MOV TEMP[4].x, IMM[1].xxxx 41: MAD TEMP[3].x, CONST[6].xxxx, TEMP[4].xxxx, TEMP[3].xxxx 42: ADD TEMP[3].x, TEMP[0].wwww, TEMP[3].xxxx 43: DP4 TEMP[0].w, CONST[10], TEMP[1] 44: DP4 TEMP[1].x, CONST[12], TEMP[1] 45: DP4 TEMP[1].y, CONST[9], TEMP[5] 46: DP4 TEMP[1].z, CONST[11], TEMP[5] 47: MAD TEMP[1].z, CONST[6].zzzz, TEMP[4].xxxx, TEMP[1].zzzz 48: ADD TEMP[3].z, TEMP[1].xxxx, TEMP[1].zzzz 49: MAD TEMP[1].x, CONST[6].yyyy, TEMP[4].xxxx, TEMP[1].yyyy 50: ADD TEMP[3].y, TEMP[0].wwww, TEMP[1].xxxx 51: MUL TEMP[0].xyz, TEMP[0], TEMP[3] 52: CMP OUT[0].xyz, TEMP[3], IMM[1].yyyy, TEMP[0] 53: MOV OUT[0].w, IMM[1].yyyy 54: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %59 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %60 = load <32 x i8>, <32 x i8> addrspace(2)* %59, align 32, !tbaa !0 %61 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %64 = bitcast <8 x i32> addrspace(2)* %63 to <32 x i8> addrspace(2)* %65 = load <32 x i8>, <32 x i8> addrspace(2)* %64, align 32, !tbaa !0 %66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %67 = bitcast <4 x i32> addrspace(2)* %66 to <16 x i8> addrspace(2)* %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %70 = bitcast <8 x i32> addrspace(2)* %69 to <32 x i8> addrspace(2)* %71 = load <32 x i8>, <32 x i8> addrspace(2)* %70, align 32, !tbaa !0 %72 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %73 = bitcast <4 x i32> addrspace(2)* %72 to <16 x i8> addrspace(2)* %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %84 = bitcast float %82 to i32 %85 = bitcast float %83 to i32 %86 = insertelement <2 x i32> undef, i32 %84, i32 0 %87 = insertelement <2 x i32> %86, i32 %85, i32 1 %88 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %87, <32 x i8> %71, <16 x i8> %74, i32 2) %89 = extractelement <4 x float> %88, i32 0 %90 = fadd float %89, 0xBFD554C980000000 %91 = fadd float %89, 0xBFD554C980000000 %92 = fadd float %89, 0xBFD554C980000000 %93 = fadd float %89, 0xBFD554C980000000 %94 = fcmp olt float %90, 0.000000e+00 %95 = fcmp olt float %91, 0.000000e+00 %96 = fcmp olt float %92, 0.000000e+00 %97 = fcmp olt float %93, 0.000000e+00 %98 = or i1 %97, %96 %99 = or i1 %98, %95 %100 = or i1 %99, %94 %101 = select i1 %100, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %101) %102 = fsub float 1.000000e+00, %56 %103 = fsub float 1.000000e+00, %57 %104 = fsub float 1.000000e+00, %58 %105 = bitcast float %82 to i32 %106 = bitcast float %83 to i32 %107 = insertelement <2 x i32> undef, i32 %105, i32 0 %108 = insertelement <2 x i32> %107, i32 %106, i32 1 %109 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %108, <32 x i8> %65, <16 x i8> %68, i32 2) %110 = extractelement <4 x float> %109, i32 0 %111 = extractelement <4 x float> %109, i32 1 %112 = extractelement <4 x float> %109, i32 2 %113 = fmul float %102, %110 %114 = fmul float %103, %111 %115 = fmul float %104, %112 %116 = fmul float %113, %28 %117 = fadd float %116, %25 %118 = fmul float %114, %28 %119 = fadd float %118, %26 %120 = fmul float %115, %28 %121 = fadd float %120, %27 %122 = bitcast float %82 to i32 %123 = bitcast float %83 to i32 %124 = insertelement <2 x i32> undef, i32 %122, i32 0 %125 = insertelement <2 x i32> %124, i32 %123, i32 1 %126 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %125, <32 x i8> %60, <16 x i8> %62, i32 2) %127 = extractelement <4 x float> %126, i32 0 %128 = extractelement <4 x float> %126, i32 1 %129 = extractelement <4 x float> %126, i32 2 %130 = fmul float %127, 2.000000e+00 %131 = fadd float %130, -1.000000e+00 %132 = fmul float %128, 2.000000e+00 %133 = fadd float %132, -1.000000e+00 %134 = fmul float %129, 2.000000e+00 %135 = fadd float %134, -1.000000e+00 %136 = fmul float %131, %131 %137 = fmul float %133, %133 %138 = fadd float %137, %136 %139 = fmul float %135, %135 %140 = fadd float %138, %139 %141 = call float @llvm.AMDGPU.rsq.clamped.f32(float %140) %142 = call float @llvm.minnum.f32(float %141, float 0x47EFFFFFE0000000) %143 = fmul float %131, %142 %144 = fmul float %133, %142 %145 = fmul float %135, %142 %146 = call float @llvm.AMDGPU.cndlt(float %18, float 1.000000e+00, float -1.000000e+00) %147 = fmul float %146, %24 %148 = fmul float %147, %143 %149 = fmul float %147, %144 %150 = fmul float %147, %145 %151 = fmul float %78, %78 %152 = fmul float %79, %79 %153 = fadd float %152, %151 %154 = fmul float %80, %80 %155 = fadd float %153, %154 %156 = call float @llvm.AMDGPU.rsq.clamped.f32(float %155) %157 = call float @llvm.minnum.f32(float %156, float 0x47EFFFFFE0000000) %158 = fmul float %78, %157 %159 = fmul float %79, %157 %160 = fmul float %80, %157 %161 = fmul float %75, %75 %162 = fmul float %76, %76 %163 = fadd float %162, %161 %164 = fmul float %77, %77 %165 = fadd float %163, %164 %166 = call float @llvm.AMDGPU.rsq.clamped.f32(float %165) %167 = call float @llvm.minnum.f32(float %166, float 0x47EFFFFFE0000000) %168 = fmul float %75, %167 %169 = fmul float %76, %167 %170 = fmul float %77, %167 %171 = fmul float %160, %169 %172 = fmul float %158, %170 %173 = fmul float %159, %168 %174 = fmul float %159, %170 %175 = fsub float %174, %171 %176 = fmul float %160, %168 %177 = fsub float %176, %172 %178 = fmul float %158, %169 %179 = fsub float %178, %173 %180 = fmul float %158, %148 %181 = fmul float %159, %149 %182 = fadd float %181, %180 %183 = fmul float %160, %150 %184 = fadd float %182, %183 %185 = fmul float %168, %148 %186 = fmul float %169, %149 %187 = fadd float %186, %185 %188 = fmul float %170, %150 %189 = fadd float %187, %188 %190 = fmul float %175, %81 %191 = fmul float %177, %81 %192 = fmul float %179, %81 %193 = fmul float %190, %148 %194 = fmul float %191, %149 %195 = fadd float %194, %193 %196 = fmul float %192, %150 %197 = fadd float %195, %196 %198 = fmul float %197, %184 %199 = fmul float %189, %184 %200 = fmul float %197, %197 %201 = fmul float %184, %184 %202 = fmul float %189, %197 %203 = fmul float %197, 0xBFF05F8CE0000000 %204 = fmul float %184, 0x3FF05F8CE0000000 %205 = fmul float %189, 0xBFF05F8CE0000000 %206 = fmul float %189, %189 %207 = fsub float %206, %200 %208 = fmul float %201, 3.000000e+00 %209 = fadd float %208, -1.000000e+00 %210 = fmul float %202, 0x3FEB756F20000000 %211 = fmul float %198, 0xBFEB756F20000000 %212 = fmul float %209, 0x3FCFB4E7C0000000 %213 = fmul float %199, 0xBFEB756F20000000 %214 = fmul float %207, 0x3FDB756F20000000 %215 = fmul float %36, %211 %216 = fmul float %37, %212 %217 = fadd float %215, %216 %218 = fmul float %38, %213 %219 = fadd float %217, %218 %220 = fmul float %39, %214 %221 = fadd float %219, %220 %222 = fmul float %32, %203 %223 = fmul float %33, %204 %224 = fadd float %222, %223 %225 = fmul float %34, %205 %226 = fadd float %224, %225 %227 = fmul float %35, %210 %228 = fadd float %226, %227 %229 = fmul float %29, 0x3FEC5BFA00000000 %230 = fadd float %229, %228 %231 = fadd float %221, %230 %232 = fmul float %44, %211 %233 = fmul float %45, %212 %234 = fadd float %232, %233 %235 = fmul float %46, %213 %236 = fadd float %234, %235 %237 = fmul float %47, %214 %238 = fadd float %236, %237 %239 = fmul float %52, %211 %240 = fmul float %53, %212 %241 = fadd float %239, %240 %242 = fmul float %54, %213 %243 = fadd float %241, %242 %244 = fmul float %55, %214 %245 = fadd float %243, %244 %246 = fmul float %40, %203 %247 = fmul float %41, %204 %248 = fadd float %246, %247 %249 = fmul float %42, %205 %250 = fadd float %248, %249 %251 = fmul float %43, %210 %252 = fadd float %250, %251 %253 = fmul float %48, %203 %254 = fmul float %49, %204 %255 = fadd float %253, %254 %256 = fmul float %50, %205 %257 = fadd float %255, %256 %258 = fmul float %51, %210 %259 = fadd float %257, %258 %260 = fmul float %31, 0x3FEC5BFA00000000 %261 = fadd float %260, %259 %262 = fadd float %245, %261 %263 = fmul float %30, 0x3FEC5BFA00000000 %264 = fadd float %263, %252 %265 = fadd float %238, %264 %266 = fmul float %117, %231 %267 = fmul float %119, %265 %268 = fmul float %121, %262 %269 = call float @llvm.AMDGPU.cndlt(float %231, float 0.000000e+00, float %266) %270 = call float @llvm.AMDGPU.cndlt(float %265, float 0.000000e+00, float %267) %271 = call float @llvm.AMDGPU.cndlt(float %262, float 0.000000e+00, float %268) %272 = call i32 @llvm.SI.packf16(float %269, float %270) %273 = bitcast i32 %272 to float %274 = call i32 @llvm.SI.packf16(float %271, float 0.000000e+00) %275 = bitcast i32 %274 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %273, float %275, float %273, float %275) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_interp_p1_f32 v5, v0, 2, 0, [m0] ; C8140200 v_interp_p2_f32 v5, [v5], v1, 2, 0, [m0] ; C8150201 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 s_load_dwordx4 s[8:11], s[4:5], 0x8 ; C0840508 s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[12:19], s[8:11] ; F0800100 0043000A v_mov_b32_e32 v1, 0xbeaaa64c ; 7E0202FF BEAAA64C s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v0, v1 ; 06000300 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680 v_cndmask_b32_e64 v0, v0, -1.0, vcc ; D2000000 01A9E700 v_cndmask_b32_e64 v0, v0, -1.0, vcc ; D2000000 01A9E700 v_cndmask_b32_e64 v0, v0, -1.0, vcc ; D2000000 01A9E700 v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[16:23], s[12:15] ; F0800700 00640C0A image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[24:31], s[8:11] ; F0800700 00460F0A s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_buffer_load_dword s6, s[0:3], 0x11 ; C2030111 s_buffer_load_dword s7, s[0:3], 0x12 ; C2038112 s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113 s_buffer_load_dword s9, s[0:3], 0x18 ; C2048118 s_buffer_load_dword s10, s[0:3], 0x19 ; C2050119 s_buffer_load_dword s11, s[0:3], 0x1a ; C205811A s_buffer_load_dword s12, s[0:3], 0x1c ; C206011C s_buffer_load_dword s13, s[0:3], 0x1d ; C206811D s_buffer_load_dword s14, s[0:3], 0x1e ; C207011E s_buffer_load_dword s15, s[0:3], 0x1f ; C207811F s_buffer_load_dword s16, s[0:3], 0x20 ; C2080120 s_buffer_load_dword s17, s[0:3], 0x21 ; C2088121 s_buffer_load_dword s18, s[0:3], 0x22 ; C2090122 s_buffer_load_dword s19, s[0:3], 0x23 ; C2098123 s_buffer_load_dword s20, s[0:3], 0x24 ; C20A0124 s_buffer_load_dword s21, s[0:3], 0x25 ; C20A8125 s_buffer_load_dword s22, s[0:3], 0x26 ; C20B0126 s_buffer_load_dword s23, s[0:3], 0x27 ; C20B8127 s_buffer_load_dword s24, s[0:3], 0x32 ; C20C0132 s_buffer_load_dword s25, s[0:3], 0x33 ; C20C8133 s_buffer_load_dword s26, s[0:3], 0x34 ; C20D0134 s_buffer_load_dword s27, s[0:3], 0x35 ; C20D8135 s_buffer_load_dword s28, s[0:3], 0x36 ; C20E0136 s_buffer_load_dword s29, s[0:3], 0x28 ; C20E8128 s_buffer_load_dword s30, s[0:3], 0x29 ; C20F0129 s_buffer_load_dword s31, s[0:3], 0x2a ; C20F812A s_buffer_load_dword s32, s[0:3], 0x2b ; C210012B s_buffer_load_dword s33, s[0:3], 0x2c ; C210812C s_buffer_load_dword s34, s[0:3], 0x2d ; C211012D s_buffer_load_dword s35, s[0:3], 0x2e ; C211812E s_buffer_load_dword s36, s[0:3], 0x2f ; C212012F s_buffer_load_dword s37, s[0:3], 0x30 ; C2128130 s_buffer_load_dword s0, s[0:3], 0x31 ; C2000131 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_sub_f32_e64 v0, 1.0, s26 ; D2080000 000034F2 v_sub_f32_e64 v1, 1.0, s27 ; D2080001 000036F2 v_sub_f32_e64 v10, 1.0, s28 ; D208000A 000038F2 v_mul_f32_e32 v0, v12, v0 ; 1000010C v_mul_f32_e32 v1, v13, v1 ; 1002030D v_mul_f32_e32 v10, v14, v10 ; 1014150E v_mov_b32_e32 v11, s5 ; 7E160205 v_mad_f32 v0, s8, v0, v11 ; D2820000 042E0008 v_mov_b32_e32 v11, s6 ; 7E160206 v_mad_f32 v1, s8, v1, v11 ; D2820001 042E0208 v_mov_b32_e32 v11, s7 ; 7E160207 v_mad_f32 v10, s8, v10, v11 ; D282000A 042E1408 v_mad_f32 v11, 2.0, v15, -1.0 ; D282000B 03CE1EF4 v_mad_f32 v12, 2.0, v16, -1.0 ; D282000C 03CE20F4 v_mad_f32 v13, 2.0, v17, -1.0 ; D282000D 03CE22F4 v_mul_f32_e32 v14, v11, v11 ; 101C170B v_mad_f32 v14, v12, v12, v14 ; D282000E 043A190C v_mad_f32 v14, v13, v13, v14 ; D282000E 043A1B0D v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v2, -1.0, 1.0, vcc ; D2000002 01A9E4F3 v_mul_f32_e32 v2, s4, v2 ; 10040404 v_min_f32_e32 v14, 0x7f7fffff, v14 ; 1E1C1CFF 7F7FFFFF v_mul_f32_e32 v11, v14, v11 ; 1016170E v_mul_f32_e32 v12, v14, v12 ; 1018190E v_mul_f32_e32 v13, v14, v13 ; 101A1B0E v_mul_f32_e32 v14, v6, v6 ; 101C0D06 v_mad_f32 v14, v7, v7, v14 ; D282000E 043A0F07 v_mad_f32 v14, v8, v8, v14 ; D282000E 043A1108 v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E v_mul_f32_e32 v11, v11, v2 ; 1016050B v_mul_f32_e32 v12, v12, v2 ; 1018050C v_mul_f32_e32 v2, v13, v2 ; 1004050D v_min_f32_e32 v13, 0x7f7fffff, v14 ; 1E1A1CFF 7F7FFFFF v_mul_f32_e32 v14, v3, v3 ; 101C0703 v_mad_f32 v14, v4, v4, v14 ; D282000E 043A0904 v_mad_f32 v14, v5, v5, v14 ; D282000E 043A0B05 v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E v_mul_f32_e32 v6, v13, v6 ; 100C0D0D v_mul_f32_e32 v7, v13, v7 ; 100E0F0D v_mul_f32_e32 v8, v13, v8 ; 1010110D v_min_f32_e32 v13, 0x7f7fffff, v14 ; 1E1A1CFF 7F7FFFFF v_mul_f32_e32 v3, v13, v3 ; 1006070D v_mul_f32_e32 v4, v13, v4 ; 1008090D v_mul_f32_e32 v5, v13, v5 ; 100A0B0D v_mul_f32_e32 v13, v4, v8 ; 101A1104 v_mad_f32 v13, v7, v5, -v13 ; D282000D 84360B07 v_mul_f32_e32 v14, v11, v6 ; 101C0D0B v_mad_f32 v14, v7, v12, v14 ; D282000E 043A1907 v_mul_f32_e32 v7, v3, v7 ; 100E0F03 v_mad_f32 v7, v6, v4, -v7 ; D2820007 841E0906 v_mul_f32_e32 v6, v5, v6 ; 100C0D05 v_mad_f32 v6, v8, v3, -v6 ; D2820006 841A0708 v_mad_f32 v8, v8, v2, v14 ; D2820008 043A0508 v_mul_f32_e32 v13, v9, v13 ; 101A1B09 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v7, v9, v7 ; 100E0F09 v_mul_f32_e32 v9, v11, v13 ; 10121B0B v_mad_f32 v6, v6, v12, v9 ; D2820006 04261906 v_mad_f32 v6, v7, v2, v6 ; D2820006 041A0507 v_mul_f32_e32 v7, v8, v8 ; 100E1108 v_mov_b32_e32 v9, 0x40400000 ; 7E1202FF 40400000 v_mad_f32 v7, v9, v7, -1.0 ; D2820007 03CE0F09 v_mul_f32_e32 v7, 0x3e7da73e, v7 ; 100E0EFF 3E7DA73E v_mul_f32_e32 v9, s17, v7 ; 10120E11 v_mul_f32_e32 v13, v8, v6 ; 101A0D08 v_mov_b32_e32 v14, 0xbf5bab79 ; 7E1C02FF BF5BAB79 v_mul_f32_e32 v13, v14, v13 ; 101A1B0E v_mad_f32 v9, s16, v13, v9 ; D2820009 04261A10 v_mul_f32_e32 v15, s30, v7 ; 101E0E1E v_mad_f32 v15, s29, v13, v15 ; D282000F 043E1A1D v_mul_f32_e32 v7, s0, v7 ; 100E0E00 v_mad_f32 v7, s37, v13, v7 ; D2820007 041E1A25 v_mul_f32_e32 v3, v11, v3 ; 1006070B v_mad_f32 v3, v4, v12, v3 ; D2820003 040E1904 v_mad_f32 v2, v5, v2, v3 ; D2820002 040E0505 v_mul_f32_e32 v3, v8, v2 ; 10060508 v_mul_f32_e32 v3, v14, v3 ; 1006070E v_mad_f32 v4, s18, v3, v9 ; D2820004 04260612 v_mad_f32 v5, s31, v3, v15 ; D2820005 043E061F v_mad_f32 v3, s24, v3, v7 ; D2820003 041E0618 v_mul_f32_e32 v7, v6, v6 ; 100E0D06 v_mad_f32 v7, v2, v2, -v7 ; D2820007 841E0502 v_mul_f32_e32 v7, 0x3edbab79, v7 ; 100E0EFF 3EDBAB79 v_mad_f32 v4, s19, v7, v4 ; D2820004 04120E13 v_mad_f32 v5, s32, v7, v5 ; D2820005 04160E20 v_mad_f32 v3, s25, v7, v3 ; D2820003 040E0E19 v_mul_f32_e32 v7, 0x3f82fc67, v8 ; 100E10FF 3F82FC67 v_mul_f32_e32 v8, s13, v7 ; 10100E0D v_mov_b32_e32 v9, 0xbf82fc67 ; 7E1202FF BF82FC67 v_mul_f32_e32 v11, v9, v6 ; 10160D09 v_mad_f32 v8, s12, v11, v8 ; D2820008 0422160C v_mul_f32_e32 v9, v9, v2 ; 10120509 v_mad_f32 v8, s14, v9, v8 ; D2820008 0422120E v_mul_f32_e32 v2, v6, v2 ; 10040506 v_mul_f32_e32 v2, 0x3f5bab79, v2 ; 100404FF 3F5BAB79 v_mad_f32 v6, s15, v2, v8 ; D2820006 0422040F v_mov_b32_e32 v8, 0x3f62dfd0 ; 7E1002FF 3F62DFD0 v_mad_f32 v6, s9, v8, v6 ; D2820006 041A1009 v_mul_f32_e32 v12, s21, v7 ; 10180E15 v_mad_f32 v12, s20, v11, v12 ; D282000C 04321614 v_mad_f32 v12, s22, v9, v12 ; D282000C 04321216 v_mad_f32 v12, s23, v2, v12 ; D282000C 04320417 v_mul_f32_e32 v7, s34, v7 ; 100E0E22 v_mad_f32 v7, s33, v11, v7 ; D2820007 041E1621 v_mad_f32 v7, s35, v9, v7 ; D2820007 041E1223 v_mad_f32 v2, s36, v2, v7 ; D2820002 041E0424 v_mad_f32 v2, s11, v8, v2 ; D2820002 040A100B v_mad_f32 v7, s10, v8, v12 ; D2820007 0432100A v_add_f32_e32 v4, v6, v4 ; 06080906 v_add_f32_e32 v2, v2, v3 ; 06040702 v_add_f32_e32 v3, v7, v5 ; 06060B07 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_mul_f32_e32 v5, v2, v10 ; 100A1502 v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v2, v5, 0, vcc ; D2000002 01A90105 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e64 v1, v2, 0 ; D25E0001 00010102 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 20 Code Size: 1096 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL IN[2], TEXCOORD[5], PERSPECTIVE DCL IN[3], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[0..9] DCL TEMP[0], LOCAL DCL TEMP[1..5] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { 0.8165, 0.5774, 0.0000, 0.0039} IMM[2] FLT32 { -0.7071, -0.4082, 0.5774, 0.7071} IMM[3] FLT32 {340282346638528859811704183484516925440.0000, -340282346638528859811704183484516925440.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[3], IN[3] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[3].xxxx, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[3], TEMP[0].xxxx 4: TEX TEMP[2], IN[1].wzzw, SAMP[2], 2D 5: MAD TEMP[2].xyz, TEMP[2], IMM[0].xxxx, IMM[0].yyyy 6: DP3 TEMP[0].x, TEMP[2], TEMP[2] 7: RSQ TEMP[0].x, TEMP[0].xxxx 8: MIN TEMP[0].x, IMM[3].xxxx, TEMP[0].xxxx 9: MUL TEMP[3].xyz, TEMP[2], TEMP[0].xxxx 10: DP3 TEMP[1].w, TEMP[3], TEMP[1] 11: MUL TEMP[2].xyz, TEMP[1].wwww, TEMP[3] 12: MAD TEMP[1].xyz, TEMP[2], IMM[0].xxxx, -TEMP[1] 13: DP2 TEMP[0].x, TEMP[1].yzzw, IMM[1] 14: ADD_SAT TEMP[2].x, IMM[1].zzzz, TEMP[0].xxxx 15: DP3_SAT TEMP[2].y, TEMP[1], IMM[2] 16: DP3_SAT TEMP[2].z, TEMP[1].yzxw, IMM[2].yzww 17: MAX TEMP[1].xyz, TEMP[2], IMM[0].wwww 18: LG2 TEMP[0].x, |TEMP[1].xxxx| 19: MAX TEMP[2].x, IMM[3].yyyy, TEMP[0].xxxx 20: LG2 TEMP[0].x, |TEMP[1].yyyy| 21: MAX TEMP[2].y, IMM[3].yyyy, TEMP[0].xxxx 22: LG2 TEMP[0].x, |TEMP[1].zzzz| 23: MAX TEMP[2].z, IMM[3].yyyy, TEMP[0].xxxx 24: MOV TEMP[1].z, IMM[0].zzzz 25: ADD TEMP[1].x, TEMP[1].zzzz, CONST[8].xxxx 26: MUL TEMP[1].xyw, TEMP[2].xyzz, TEMP[1].xxxx 27: EX2 TEMP[2].x, TEMP[1].xxxx 28: EX2 TEMP[2].y, TEMP[1].yyyy 29: EX2 TEMP[2].z, TEMP[1].wwww 30: TEX TEMP[4], IN[0], SAMP[1], 2D 31: MUL TEMP[1].xyw, TEMP[4].xyzz, CONST[7].xyzz 32: DP3 TEMP[2].x, TEMP[1].xyww, TEMP[2] 33: TEX TEMP[4], IN[1], SAMP[4], 2D 34: MAD TEMP[2].yzw, TEMP[4].xxyz, CONST[5].wwww, CONST[5].xxyz 35: TEX TEMP[4], IN[0], SAMP[0], 2D 36: MUL TEMP[4].xyz, TEMP[4], CONST[6] 37: MUL TEMP[2].yzw, TEMP[2], TEMP[4].xxyz 38: MUL TEMP[2].xyz, TEMP[2].xxxx, TEMP[2].yzww 39: DP2 TEMP[0].x, TEMP[3].yzzw, IMM[1] 40: ADD_SAT TEMP[5].x, IMM[1].zzzz, TEMP[0].xxxx 41: DP3_SAT TEMP[5].y, TEMP[3], IMM[2] 42: DP3_SAT TEMP[5].z, TEMP[3].yzxw, IMM[2].yzww 43: MUL TEMP[3].xyz, TEMP[5], TEMP[5] 44: MAX TEMP[5].xyz, TEMP[3], IMM[0].wwww 45: DP3 TEMP[1].x, TEMP[1].xyww, TEMP[5] 46: ADD TEMP[1].yzw, TEMP[1].zzzz, -CONST[0].xxyz 47: TEX TEMP[3], IN[1], SAMP[3], 2D 48: MUL TEMP[1].yzw, TEMP[1], TEMP[3].xxyz 49: MAD TEMP[1].yzw, TEMP[1], CONST[4].wwww, CONST[4].xxyz 50: MUL TEMP[3].xyz, TEMP[4], TEMP[1].yzww 51: MAD TEMP[2].xyz, TEMP[3], TEMP[1].xxxx, TEMP[2] 52: ADD TEMP[2].xyz, TEMP[2], CONST[0] 53: MAD OUT[0].xyz, TEMP[1].yzww, CONST[9], TEMP[2] 54: MUL OUT[0].w, IMM[1].wwww, IN[2].wwww 55: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %45 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %46 = load <32 x i8>, <32 x i8> addrspace(2)* %45, align 32, !tbaa !0 %47 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 %49 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %50 = bitcast <8 x i32> addrspace(2)* %49 to <32 x i8> addrspace(2)* %51 = load <32 x i8>, <32 x i8> addrspace(2)* %50, align 32, !tbaa !0 %52 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %53 = bitcast <4 x i32> addrspace(2)* %52 to <16 x i8> addrspace(2)* %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %56 = bitcast <8 x i32> addrspace(2)* %55 to <32 x i8> addrspace(2)* %57 = load <32 x i8>, <32 x i8> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %59 = bitcast <4 x i32> addrspace(2)* %58 to <16 x i8> addrspace(2)* %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %62 = bitcast <8 x i32> addrspace(2)* %61 to <32 x i8> addrspace(2)* %63 = load <32 x i8>, <32 x i8> addrspace(2)* %62, align 32, !tbaa !0 %64 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %65 = bitcast <4 x i32> addrspace(2)* %64 to <16 x i8> addrspace(2)* %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %68 = bitcast <8 x i32> addrspace(2)* %67 to <32 x i8> addrspace(2)* %69 = load <32 x i8>, <32 x i8> addrspace(2)* %68, align 32, !tbaa !0 %70 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %71 = bitcast <4 x i32> addrspace(2)* %70 to <16 x i8> addrspace(2)* %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %83 = fmul float %80, %80 %84 = fmul float %81, %81 %85 = fadd float %84, %83 %86 = fmul float %82, %82 %87 = fadd float %85, %86 %88 = call float @llvm.AMDGPU.rsq.clamped.f32(float %87) %89 = call float @llvm.minnum.f32(float %88, float 0x47EFFFFFE0000000) %90 = fmul float %80, %89 %91 = fmul float %81, %89 %92 = fmul float %82, %89 %93 = bitcast float %78 to i32 %94 = bitcast float %77 to i32 %95 = insertelement <2 x i32> undef, i32 %93, i32 0 %96 = insertelement <2 x i32> %95, i32 %94, i32 1 %97 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %57, <16 x i8> %60, i32 2) %98 = extractelement <4 x float> %97, i32 0 %99 = extractelement <4 x float> %97, i32 1 %100 = extractelement <4 x float> %97, i32 2 %101 = fmul float %98, 2.000000e+00 %102 = fadd float %101, -1.000000e+00 %103 = fmul float %99, 2.000000e+00 %104 = fadd float %103, -1.000000e+00 %105 = fmul float %100, 2.000000e+00 %106 = fadd float %105, -1.000000e+00 %107 = fmul float %102, %102 %108 = fmul float %104, %104 %109 = fadd float %108, %107 %110 = fmul float %106, %106 %111 = fadd float %109, %110 %112 = call float @llvm.AMDGPU.rsq.clamped.f32(float %111) %113 = call float @llvm.minnum.f32(float %112, float 0x47EFFFFFE0000000) %114 = fmul float %102, %113 %115 = fmul float %104, %113 %116 = fmul float %106, %113 %117 = fmul float %114, %90 %118 = fmul float %115, %91 %119 = fadd float %118, %117 %120 = fmul float %116, %92 %121 = fadd float %119, %120 %122 = fmul float %121, %114 %123 = fmul float %121, %115 %124 = fmul float %121, %116 %125 = fmul float %122, 2.000000e+00 %126 = fsub float %125, %90 %127 = fmul float %123, 2.000000e+00 %128 = fsub float %127, %91 %129 = fmul float %124, 2.000000e+00 %130 = fsub float %129, %92 %131 = fmul float %128, 0x3FEA20BD80000000 %132 = fmul float %130, 0x3FE279A740000000 %133 = fadd float %131, %132 %134 = fadd float %133, 0.000000e+00 %135 = call float @llvm.AMDIL.clamp.(float %134, float 0.000000e+00, float 1.000000e+00) %136 = fmul float %126, 0xBFE6A09E60000000 %137 = fmul float %128, 0xBFDA20BD80000000 %138 = fadd float %137, %136 %139 = fmul float %130, 0x3FE279A740000000 %140 = fadd float %138, %139 %141 = call float @llvm.AMDIL.clamp.(float %140, float 0.000000e+00, float 1.000000e+00) %142 = fmul float %128, 0xBFDA20BD80000000 %143 = fmul float %130, 0x3FE279A740000000 %144 = fadd float %143, %142 %145 = fmul float %126, 0x3FE6A09E60000000 %146 = fadd float %144, %145 %147 = call float @llvm.AMDIL.clamp.(float %146, float 0.000000e+00, float 1.000000e+00) %148 = call float @llvm.maxnum.f32(float %135, float 0x3EB0C6F7A0000000) %149 = call float @llvm.maxnum.f32(float %141, float 0x3EB0C6F7A0000000) %150 = call float @llvm.maxnum.f32(float %147, float 0x3EB0C6F7A0000000) %151 = call float @fabs(float %148) %152 = call float @llvm.log2.f32(float %151) %153 = call float @llvm.maxnum.f32(float %152, float 0xC7EFFFFFE0000000) %154 = call float @fabs(float %149) %155 = call float @llvm.log2.f32(float %154) %156 = call float @llvm.maxnum.f32(float %155, float 0xC7EFFFFFE0000000) %157 = call float @fabs(float %150) %158 = call float @llvm.log2.f32(float %157) %159 = call float @llvm.maxnum.f32(float %158, float 0xC7EFFFFFE0000000) %160 = fadd float %41, 1.000000e+00 %161 = fmul float %153, %160 %162 = fmul float %156, %160 %163 = fmul float %159, %160 %164 = call float @llvm.AMDIL.exp.(float %161) %165 = call float @llvm.AMDIL.exp.(float %162) %166 = call float @llvm.AMDIL.exp.(float %163) %167 = bitcast float %73 to i32 %168 = bitcast float %74 to i32 %169 = insertelement <2 x i32> undef, i32 %167, i32 0 %170 = insertelement <2 x i32> %169, i32 %168, i32 1 %171 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %170, <32 x i8> %51, <16 x i8> %54, i32 2) %172 = extractelement <4 x float> %171, i32 0 %173 = extractelement <4 x float> %171, i32 1 %174 = extractelement <4 x float> %171, i32 2 %175 = fmul float %172, %38 %176 = fmul float %173, %39 %177 = fmul float %174, %40 %178 = fmul float %175, %164 %179 = fmul float %176, %165 %180 = fadd float %179, %178 %181 = fmul float %177, %166 %182 = fadd float %180, %181 %183 = bitcast float %75 to i32 %184 = bitcast float %76 to i32 %185 = insertelement <2 x i32> undef, i32 %183, i32 0 %186 = insertelement <2 x i32> %185, i32 %184, i32 1 %187 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %186, <32 x i8> %69, <16 x i8> %72, i32 2) %188 = extractelement <4 x float> %187, i32 0 %189 = extractelement <4 x float> %187, i32 1 %190 = extractelement <4 x float> %187, i32 2 %191 = fmul float %188, %34 %192 = fadd float %191, %31 %193 = fmul float %189, %34 %194 = fadd float %193, %32 %195 = fmul float %190, %34 %196 = fadd float %195, %33 %197 = bitcast float %73 to i32 %198 = bitcast float %74 to i32 %199 = insertelement <2 x i32> undef, i32 %197, i32 0 %200 = insertelement <2 x i32> %199, i32 %198, i32 1 %201 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %200, <32 x i8> %46, <16 x i8> %48, i32 2) %202 = extractelement <4 x float> %201, i32 0 %203 = extractelement <4 x float> %201, i32 1 %204 = extractelement <4 x float> %201, i32 2 %205 = fmul float %202, %35 %206 = fmul float %203, %36 %207 = fmul float %204, %37 %208 = fmul float %192, %205 %209 = fmul float %194, %206 %210 = fmul float %196, %207 %211 = fmul float %182, %208 %212 = fmul float %182, %209 %213 = fmul float %182, %210 %214 = fmul float %115, 0x3FEA20BD80000000 %215 = fmul float %116, 0x3FE279A740000000 %216 = fadd float %214, %215 %217 = fadd float %216, 0.000000e+00 %218 = call float @llvm.AMDIL.clamp.(float %217, float 0.000000e+00, float 1.000000e+00) %219 = fmul float %114, 0xBFE6A09E60000000 %220 = fmul float %115, 0xBFDA20BD80000000 %221 = fadd float %220, %219 %222 = fmul float %116, 0x3FE279A740000000 %223 = fadd float %221, %222 %224 = call float @llvm.AMDIL.clamp.(float %223, float 0.000000e+00, float 1.000000e+00) %225 = fmul float %115, 0xBFDA20BD80000000 %226 = fmul float %116, 0x3FE279A740000000 %227 = fadd float %226, %225 %228 = fmul float %114, 0x3FE6A09E60000000 %229 = fadd float %227, %228 %230 = call float @llvm.AMDIL.clamp.(float %229, float 0.000000e+00, float 1.000000e+00) %231 = fmul float %218, %218 %232 = fmul float %224, %224 %233 = fmul float %230, %230 %234 = call float @llvm.maxnum.f32(float %231, float 0x3EB0C6F7A0000000) %235 = call float @llvm.maxnum.f32(float %232, float 0x3EB0C6F7A0000000) %236 = call float @llvm.maxnum.f32(float %233, float 0x3EB0C6F7A0000000) %237 = fmul float %175, %234 %238 = fmul float %176, %235 %239 = fadd float %238, %237 %240 = fmul float %177, %236 %241 = fadd float %239, %240 %242 = fsub float 1.000000e+00, %24 %243 = fsub float 1.000000e+00, %25 %244 = fsub float 1.000000e+00, %26 %245 = bitcast float %75 to i32 %246 = bitcast float %76 to i32 %247 = insertelement <2 x i32> undef, i32 %245, i32 0 %248 = insertelement <2 x i32> %247, i32 %246, i32 1 %249 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %248, <32 x i8> %63, <16 x i8> %66, i32 2) %250 = extractelement <4 x float> %249, i32 0 %251 = extractelement <4 x float> %249, i32 1 %252 = extractelement <4 x float> %249, i32 2 %253 = fmul float %242, %250 %254 = fmul float %243, %251 %255 = fmul float %244, %252 %256 = fmul float %253, %30 %257 = fadd float %256, %27 %258 = fmul float %254, %30 %259 = fadd float %258, %28 %260 = fmul float %255, %30 %261 = fadd float %260, %29 %262 = fmul float %205, %257 %263 = fmul float %206, %259 %264 = fmul float %207, %261 %265 = fmul float %262, %241 %266 = fadd float %265, %211 %267 = fmul float %263, %241 %268 = fadd float %267, %212 %269 = fmul float %264, %241 %270 = fadd float %269, %213 %271 = fadd float %266, %24 %272 = fadd float %268, %25 %273 = fadd float %270, %26 %274 = fmul float %257, %42 %275 = fadd float %274, %271 %276 = fmul float %259, %43 %277 = fadd float %276, %272 %278 = fmul float %261, %44 %279 = fadd float %278, %273 %280 = fmul float %79, 3.906250e-03 %281 = call i32 @llvm.SI.packf16(float %275, float %277) %282 = bitcast i32 %281 to float %283 = call i32 @llvm.SI.packf16(float %279, float %280) %284 = bitcast i32 %283 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %282, float %284, float %282, float %284) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v2, 0x3f13cd3a ; 7E0402FF 3F13CD3A v_mov_b32_e32 v3, 0x3f5105ec ; 7E0602FF 3F5105EC v_mov_b32_e32 v4, 0xbf3504f3 ; 7E0802FF BF3504F3 v_mov_b32_e32 v5, 0xbed105ec ; 7E0A02FF BED105EC v_mov_b32_e32 v6, 0x3f3504f3 ; 7E0C02FF 3F3504F3 v_mov_b32_e32 v7, 0xff7fffff ; 7E0E02FF FF7FFFFF v_interp_p1_f32 v8, v0, 0, 0, [m0] ; C8200000 v_interp_p2_f32 v8, [v8], v1, 0, 0, [m0] ; C8210001 v_interp_p1_f32 v9, v0, 1, 0, [m0] ; C8240100 v_interp_p2_f32 v9, [v9], v1, 1, 0, [m0] ; C8250101 v_interp_p1_f32 v10, v0, 0, 1, [m0] ; C8280400 v_interp_p2_f32 v10, [v10], v1, 0, 1, [m0] ; C8290401 v_interp_p1_f32 v11, v0, 1, 1, [m0] ; C82C0500 v_interp_p2_f32 v11, [v11], v1, 1, 1, [m0] ; C82D0501 v_interp_p1_f32 v13, v0, 2, 1, [m0] ; C8340600 v_interp_p2_f32 v13, [v13], v1, 2, 1, [m0] ; C8350601 v_interp_p1_f32 v12, v0, 3, 1, [m0] ; C8300700 v_interp_p2_f32 v12, [v12], v1, 3, 1, [m0] ; C8310701 v_interp_p1_f32 v14, v0, 3, 2, [m0] ; C8380B00 v_interp_p2_f32 v14, [v14], v1, 3, 2, [m0] ; C8390B01 v_mul_f32_e32 v14, 0x3b800000, v14 ; 101C1CFF 3B800000 v_interp_p1_f32 v15, v0, 0, 3, [m0] ; C83C0C00 v_interp_p2_f32 v15, [v15], v1, 0, 3, [m0] ; C83D0C01 v_interp_p1_f32 v16, v0, 1, 3, [m0] ; C8400D00 v_interp_p2_f32 v16, [v16], v1, 1, 3, [m0] ; C8410D01 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx4 s[52:55], s[4:5], 0x4 ; C09A0504 s_load_dwordx4 s[56:59], s[4:5], 0x8 ; C09C0508 s_load_dwordx4 s[16:19], s[4:5], 0xc ; C088050C s_load_dwordx4 s[40:43], s[4:5], 0x10 ; C0940510 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_load_dwordx8 s[60:67], s[6:7], 0x8 ; C0DE0708 s_load_dwordx8 s[68:75], s[6:7], 0x10 ; C0E20710 s_load_dwordx8 s[20:27], s[6:7], 0x18 ; C0CA0718 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s2, s[12:15], 0x0 ; C2010D00 s_buffer_load_dword s1, s[12:15], 0x1 ; C2008D01 s_buffer_load_dword s0, s[12:15], 0x2 ; C2000D02 s_buffer_load_dword s8, s[12:15], 0x10 ; C2040D10 s_buffer_load_dword s5, s[12:15], 0x11 ; C2028D11 s_buffer_load_dword s10, s[12:15], 0x17 ; C2050D17 s_buffer_load_dword s11, s[12:15], 0x18 ; C2058D18 s_buffer_load_dword s76, s[12:15], 0x19 ; C2260D19 s_buffer_load_dword s77, s[12:15], 0x1a ; C2268D1A s_buffer_load_dword s78, s[12:15], 0x1c ; C2270D1C s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v1, 1.0, s2 ; D2080001 000004F2 v_sub_f32_e64 v17, 1.0, s1 ; D2080011 000002F2 v_sub_f32_e64 v18, 1.0, s0 ; D2080012 000000F2 s_buffer_load_dword s79, s[12:15], 0x1d ; C2278D1D s_buffer_load_dword s80, s[12:15], 0x1e ; C2280D1E s_buffer_load_dword s9, s[12:15], 0x20 ; C2048D20 s_buffer_load_dword s4, s[12:15], 0x24 ; C2020D24 s_buffer_load_dword s3, s[12:15], 0x25 ; C2018D25 s_buffer_load_dword s81, s[12:15], 0x14 ; C2288D14 s_buffer_load_dword s82, s[12:15], 0x15 ; C2290D15 s_buffer_load_dword s83, s[12:15], 0x16 ; C2298D16 s_load_dwordx8 s[44:51], s[6:7], 0x20 ; C0D60720 image_sample v[19:21], 7, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[68:75], s[56:59] ; F0800700 01D1130C image_sample v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[60:67], s[52:55] ; F0800700 01AF1608 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mad_f32 v12, 2.0, v19, -1.0 ; D282000C 03CE26F4 v_mov_b32_e32 v13, s81 ; 7E1A0251 v_mov_b32_e32 v19, s82 ; 7E260252 v_mov_b32_e32 v25, s83 ; 7E320253 image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[44:51], s[40:43] ; F0800700 014B1A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v13, s10, v26, v13 ; D282000D 0436340A v_mad_f32 v19, s10, v27, v19 ; D2820013 044E360A v_mad_f32 v25, s10, v28, v25 ; D2820019 0466380A image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[32:39], s[28:31] ; F0800700 00E81A08 image_sample v[8:10], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[20:27], s[16:19] ; F0800700 0085080A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v8, v1 ; 10020308 v_mul_f32_e32 v8, v9, v17 ; 10102309 v_mul_f32_e32 v9, v10, v18 ; 1012250A v_mad_f32 v10, 2.0, v20, -1.0 ; D282000A 03CE28F4 v_mad_f32 v11, 2.0, v21, -1.0 ; D282000B 03CE2AF4 v_mul_f32_e32 v17, s78, v22 ; 10222C4E v_mul_f32_e32 v18, s79, v23 ; 10242E4F v_mul_f32_e32 v20, s80, v24 ; 10283050 v_mul_f32_e32 v21, s11, v26 ; 102A340B v_mul_f32_e32 v22, s76, v27 ; 102C364C v_mul_f32_e32 v23, s77, v28 ; 102E384D v_mul_f32_e32 v24, v12, v12 ; 1030190C v_mad_f32 v24, v10, v10, v24 ; D2820018 0462150A v_mad_f32 v24, v11, v11, v24 ; D2820018 0462170B v_rsq_clamp_f32_e32 v24, v24 ; 7E305918 v_mul_f32_e32 v26, v15, v15 ; 10341F0F v_mad_f32 v26, v16, v16, v26 ; D282001A 046A2110 v_mad_f32 v26, v0, v0, v26 ; D282001A 046A0100 v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A v_min_f32_e32 v24, 0x7f7fffff, v24 ; 1E3030FF 7F7FFFFF v_mul_f32_e32 v12, v24, v12 ; 10181918 v_mul_f32_e32 v10, v24, v10 ; 10141518 v_mul_f32_e32 v11, v24, v11 ; 10161718 v_min_f32_e32 v24, 0x7f7fffff, v26 ; 1E3034FF 7F7FFFFF v_mul_f32_e32 v26, v24, v15 ; 10341F18 v_mul_f32_e32 v26, v26, v12 ; 1034191A v_mul_f32_e32 v27, v24, v16 ; 10362118 v_mad_f32 v26, v10, v27, v26 ; D282001A 046A370A v_mul_f32_e32 v27, v24, v0 ; 10360118 v_mad_f32 v26, v11, v27, v26 ; D282001A 046A370B v_mul_f32_e32 v27, v12, v26 ; 1036350C v_mad_f32 v27, v26, v12, v27 ; D282001B 046E191A v_mad_f32 v15, -v15, v24, v27 ; D282000F 246E310F v_mul_f32_e32 v27, v10, v26 ; 1036350A v_mad_f32 v27, v26, v10, v27 ; D282001B 046E151A v_mad_f32 v16, -v16, v24, v27 ; D2820010 246E3110 v_mul_f32_e32 v27, v11, v26 ; 1036350B v_mad_f32 v26, v26, v11, v27 ; D282001A 046E171A v_mad_f32 v0, -v0, v24, v26 ; D2820000 246A3100 v_mad_f32 v24, v0, v2, 0 ; D2820018 02020500 v_mad_f32 v24, v16, v3, v24 ; D2820018 04620710 v_mul_f32_e32 v26, v4, v15 ; 10341F04 v_mad_f32 v26, v16, v5, v26 ; D282001A 046A0B10 v_mul_f32_e32 v16, v5, v16 ; 10202105 v_mad_f32 v26, v0, v2, v26 ; D282001A 046A0500 v_mad_f32 v0, v0, v2, v16 ; D2820000 04420500 v_mad_f32 v16, v11, v2, 0 ; D2820010 0202050B v_mad_f32 v3, v10, v3, v16 ; D2820003 0442070A v_mul_f32_e32 v4, v4, v12 ; 10081904 v_mad_f32 v4, v10, v5, v4 ; D2820004 04120B0A v_mul_f32_e32 v5, v5, v10 ; 100A1505 v_mad_f32 v4, v11, v2, v4 ; D2820004 0412050B v_mad_f32 v2, v11, v2, v5 ; D2820002 0416050B v_mad_f32 v0, v15, v6, v0 ; D2820000 04020D0F s_buffer_load_dword s6, s[12:15], 0x12 ; C2030D12 s_buffer_load_dword s7, s[12:15], 0x13 ; C2038D13 s_buffer_load_dword s10, s[12:15], 0x26 ; C2050D26 v_add_f32_e64 v5, 0, v24 clamp ; D2060805 00023080 v_add_f32_e64 v10, 0, v26 clamp ; D206080A 00023480 v_max_f32_e32 v5, 0x358637bd, v5 ; 200A0AFF 358637BD v_max_f32_e32 v10, 0x358637bd, v10 ; 201414FF 358637BD v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_log_f32_e64 v10, |v10| ; D34E010A 0000010A v_mad_f32 v2, v12, v6, v2 ; D2820002 040A0D0C v_add_f32_e64 v6, 1.0, s9 ; D2060006 000012F2 v_max_f32_e32 v5, v7, v5 ; 200A0B07 v_max_f32_e32 v10, v7, v10 ; 20141507 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_mul_f32_e32 v10, v6, v10 ; 10141506 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_exp_f32_e32 v10, v10 ; 7E144B0A v_mul_f32_e32 v5, v5, v17 ; 100A2305 v_mad_f32 v5, v18, v10, v5 ; D2820005 04161512 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_max_f32_e32 v3, 0x358637bd, v3 ; 200606FF 358637BD v_mul_f32_e32 v3, v3, v17 ; 10062303 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_max_f32_e32 v0, 0x358637bd, v0 ; 200000FF 358637BD v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_max_f32_e32 v4, 0x358637bd, v4 ; 200808FF 358637BD v_mad_f32 v3, v18, v4, v3 ; D2820003 040E0912 v_max_f32_e32 v0, v7, v0 ; 20000107 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mad_f32 v0, v20, v0, v5 ; D2820000 04160114 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_max_f32_e32 v2, 0x358637bd, v2 ; 200404FF 358637BD v_mad_f32 v2, v20, v2, v3 ; D2820002 040E0514 v_mov_b32_e32 v3, s8 ; 7E060208 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s7, v1, v3 ; D2820001 040E0207 v_mov_b32_e32 v3, s5 ; 7E060205 v_mad_f32 v3, s7, v8, v3 ; D2820003 040E1007 v_mov_b32_e32 v4, s6 ; 7E080206 v_mad_f32 v4, s7, v9, v4 ; D2820004 04121207 v_mul_f32_e32 v5, v21, v13 ; 100A1B15 v_mul_f32_e32 v6, v22, v19 ; 100C2716 v_mul_f32_e32 v7, v23, v25 ; 100E3317 v_mul_f32_e32 v8, v1, v21 ; 10102B01 v_mul_f32_e32 v9, v3, v22 ; 10122D03 v_mul_f32_e32 v10, v4, v23 ; 10142F04 v_mad_f32 v5, v0, v5, s2 ; D2820005 000A0B00 v_mad_f32 v5, v8, v2, v5 ; D2820005 04160508 v_mad_f32 v6, v0, v6, s1 ; D2820006 00060D00 v_mad_f32 v6, v9, v2, v6 ; D2820006 041A0509 v_mad_f32 v0, v0, v7, s0 ; D2820000 00020F00 v_mad_f32 v0, v10, v2, v0 ; D2820000 0402050A v_mad_f32 v1, v1, s4, v5 ; D2820001 04140901 v_mad_f32 v2, v3, s3, v6 ; D2820002 04180703 v_mad_f32 v0, v4, s10, v0 ; D2820000 04001504 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v0, v0, v14 ; 5E001D00 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 88 VGPRS: 32 Code Size: 1104 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[5], PERSPECTIVE DCL IN[1], TEXCOORD[6], PERSPECTIVE DCL IN[2], TEXCOORD[7], PERSPECTIVE DCL IN[3], TEXCOORD[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL CONST[0..12] DCL TEMP[0], LOCAL DCL TEMP[1..4] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, -0.0000} IMM[1] FLT32 { 0.3000, 0.5900, 0.1100, 0.0000} IMM[2] FLT32 { 0.5000, -0.5000, 0.0039, 0.0000} IMM[3] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[1], IN[1] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[3].xxxx, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[1], TEMP[0].xxxx 4: TEX TEMP[2], IN[3], SAMP[0], 2D 5: MAD TEMP[2].xyz, TEMP[2], IMM[0].xxxx, IMM[0].yyyy 6: DP3_SAT TEMP[1].x, TEMP[2], TEMP[1] 7: DP3 TEMP[0].x, TEMP[2], TEMP[2] 8: RSQ TEMP[0].x, TEMP[0].xxxx 9: MIN TEMP[0].x, IMM[3].xxxx, TEMP[0].xxxx 10: MUL TEMP[3].xyz, TEMP[2], TEMP[0].xxxx 11: ADD TEMP[1].y, TEMP[1].xxxx, IMM[0].wwww 12: POW TEMP[2].x, |TEMP[1].xxxx|, CONST[8].zzzz 13: CMP TEMP[1].x, TEMP[1].yyyy, IMM[1].wwww, TEMP[2].xxxx 14: TEX TEMP[2], IN[3], SAMP[3], 2D 15: MUL TEMP[1].y, TEMP[1].xxxx, TEMP[2].yyyy 16: MUL TEMP[1].xzw, TEMP[1].xxxx, CONST[7].xyyz 17: TEX TEMP[2], IN[3], SAMP[2], 2D 18: DP3 TEMP[2].w, TEMP[2], IMM[1] 19: LRP TEMP[4].xyz, CONST[8].yyyy, TEMP[2].wwww, TEMP[2] 20: MAD TEMP[1].xzw, TEMP[4].xyyz, TEMP[1], -TEMP[2].xyyz 21: MAD TEMP[1].xyz, TEMP[1].yyyy, TEMP[1].xzww, TEMP[2] 22: DP3 TEMP[1].w, TEMP[1], IMM[1] 23: LRP TEMP[2].xyz, CONST[8].wwww, TEMP[1].wwww, TEMP[1] 24: MUL TEMP[1].xyz, TEMP[2], CONST[9].xxxx 25: MOV TEMP[2].z, IMM[0].zzzz 26: ADD TEMP[2].xyz, TEMP[2].zzzz, -CONST[0] 27: MUL TEMP[1].xyz, TEMP[1], TEMP[2] 28: MAD TEMP[1].xyz, TEMP[1], CONST[4].wwww, CONST[4] 29: DP3 TEMP[0].x, IN[2], IN[2] 30: RSQ TEMP[0].x, TEMP[0].xxxx 31: MIN TEMP[0].x, IMM[3].xxxx, TEMP[0].xxxx 32: MUL TEMP[2].xyz, IN[2], TEMP[0].xxxx 33: DP3 TEMP[1].w, TEMP[2], TEMP[3] 34: MAD TEMP[2].xy, TEMP[1].wwww, IMM[2], IMM[2].xxxx 35: MUL TEMP[2].xy, TEMP[2], TEMP[2] 36: MUL TEMP[2].yzw, TEMP[1].xxyz, TEMP[2].yyyy 37: MUL TEMP[3].xyz, TEMP[1], TEMP[2].xxxx 38: MUL TEMP[2].xyz, TEMP[2].yzww, CONST[11] 39: MAD TEMP[2].xyz, TEMP[3], CONST[10], TEMP[2] 40: TEX TEMP[3], IN[3], SAMP[1], 2D 41: MUL TEMP[3].xyz, TEMP[3], CONST[6] 42: MOV TEMP[4].x, CONST[8].xxxx 43: MAD TEMP[3].xyz, CONST[6].wwww, TEMP[3], TEMP[4].xxxx 44: ADD TEMP[3].xyz, TEMP[3], CONST[0] 45: MAD TEMP[2].xyz, TEMP[2], CONST[12].wwww, TEMP[3] 46: MAD OUT[0].xyz, TEMP[1], CONST[12], TEMP[2] 47: MUL OUT[0].w, IMM[2].zzzz, IN[0].wwww 48: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %53 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %54 = load <32 x i8>, <32 x i8> addrspace(2)* %53, align 32, !tbaa !0 %55 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 %57 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %58 = bitcast <8 x i32> addrspace(2)* %57 to <32 x i8> addrspace(2)* %59 = load <32 x i8>, <32 x i8> addrspace(2)* %58, align 32, !tbaa !0 %60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %61 = bitcast <4 x i32> addrspace(2)* %60 to <16 x i8> addrspace(2)* %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %64 = bitcast <8 x i32> addrspace(2)* %63 to <32 x i8> addrspace(2)* %65 = load <32 x i8>, <32 x i8> addrspace(2)* %64, align 32, !tbaa !0 %66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %67 = bitcast <4 x i32> addrspace(2)* %66 to <16 x i8> addrspace(2)* %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %70 = bitcast <8 x i32> addrspace(2)* %69 to <32 x i8> addrspace(2)* %71 = load <32 x i8>, <32 x i8> addrspace(2)* %70, align 32, !tbaa !0 %72 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %73 = bitcast <4 x i32> addrspace(2)* %72 to <16 x i8> addrspace(2)* %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %84 = fmul float %76, %76 %85 = fmul float %77, %77 %86 = fadd float %85, %84 %87 = fmul float %78, %78 %88 = fadd float %86, %87 %89 = call float @llvm.AMDGPU.rsq.clamped.f32(float %88) %90 = call float @llvm.minnum.f32(float %89, float 0x47EFFFFFE0000000) %91 = fmul float %76, %90 %92 = fmul float %77, %90 %93 = fmul float %78, %90 %94 = bitcast float %82 to i32 %95 = bitcast float %83 to i32 %96 = insertelement <2 x i32> undef, i32 %94, i32 0 %97 = insertelement <2 x i32> %96, i32 %95, i32 1 %98 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %97, <32 x i8> %54, <16 x i8> %56, i32 2) %99 = extractelement <4 x float> %98, i32 0 %100 = extractelement <4 x float> %98, i32 1 %101 = extractelement <4 x float> %98, i32 2 %102 = fmul float %99, 2.000000e+00 %103 = fadd float %102, -1.000000e+00 %104 = fmul float %100, 2.000000e+00 %105 = fadd float %104, -1.000000e+00 %106 = fmul float %101, 2.000000e+00 %107 = fadd float %106, -1.000000e+00 %108 = fmul float %103, %91 %109 = fmul float %105, %92 %110 = fadd float %109, %108 %111 = fmul float %107, %93 %112 = fadd float %110, %111 %113 = call float @llvm.AMDIL.clamp.(float %112, float 0.000000e+00, float 1.000000e+00) %114 = fmul float %103, %103 %115 = fmul float %105, %105 %116 = fadd float %115, %114 %117 = fmul float %107, %107 %118 = fadd float %116, %117 %119 = call float @llvm.AMDGPU.rsq.clamped.f32(float %118) %120 = call float @llvm.minnum.f32(float %119, float 0x47EFFFFFE0000000) %121 = fmul float %103, %120 %122 = fmul float %105, %120 %123 = fmul float %107, %120 %124 = fadd float %113, 0xBEB0C6F7A0000000 %125 = call float @fabs(float %113) %126 = call float @llvm.pow.f32(float %125, float %40) %127 = call float @llvm.AMDGPU.cndlt(float %124, float 0.000000e+00, float %126) %128 = bitcast float %82 to i32 %129 = bitcast float %83 to i32 %130 = insertelement <2 x i32> undef, i32 %128, i32 0 %131 = insertelement <2 x i32> %130, i32 %129, i32 1 %132 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %131, <32 x i8> %71, <16 x i8> %74, i32 2) %133 = extractelement <4 x float> %132, i32 1 %134 = fmul float %127, %133 %135 = fmul float %127, %35 %136 = fmul float %127, %36 %137 = fmul float %127, %37 %138 = bitcast float %82 to i32 %139 = bitcast float %83 to i32 %140 = insertelement <2 x i32> undef, i32 %138, i32 0 %141 = insertelement <2 x i32> %140, i32 %139, i32 1 %142 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %141, <32 x i8> %65, <16 x i8> %68, i32 2) %143 = extractelement <4 x float> %142, i32 0 %144 = extractelement <4 x float> %142, i32 1 %145 = extractelement <4 x float> %142, i32 2 %146 = fmul float %143, 0x3FD3333340000000 %147 = fmul float %144, 0x3FE2E147A0000000 %148 = fadd float %147, %146 %149 = fmul float %145, 0x3FBC28F5C0000000 %150 = fadd float %148, %149 %151 = call float @llvm.AMDGPU.lrp(float %39, float %150, float %143) %152 = call float @llvm.AMDGPU.lrp(float %39, float %150, float %144) %153 = call float @llvm.AMDGPU.lrp(float %39, float %150, float %145) %154 = fmul float %151, %135 %155 = fsub float %154, %143 %156 = fmul float %152, %136 %157 = fsub float %156, %144 %158 = fmul float %153, %137 %159 = fsub float %158, %145 %160 = fmul float %134, %155 %161 = fadd float %160, %143 %162 = fmul float %134, %157 %163 = fadd float %162, %144 %164 = fmul float %134, %159 %165 = fadd float %164, %145 %166 = fmul float %161, 0x3FD3333340000000 %167 = fmul float %163, 0x3FE2E147A0000000 %168 = fadd float %167, %166 %169 = fmul float %165, 0x3FBC28F5C0000000 %170 = fadd float %168, %169 %171 = call float @llvm.AMDGPU.lrp(float %41, float %170, float %161) %172 = call float @llvm.AMDGPU.lrp(float %41, float %170, float %163) %173 = call float @llvm.AMDGPU.lrp(float %41, float %170, float %165) %174 = fmul float %171, %42 %175 = fmul float %172, %42 %176 = fmul float %173, %42 %177 = fsub float 1.000000e+00, %24 %178 = fsub float 1.000000e+00, %25 %179 = fsub float 1.000000e+00, %26 %180 = fmul float %174, %177 %181 = fmul float %175, %178 %182 = fmul float %176, %179 %183 = fmul float %180, %30 %184 = fadd float %183, %27 %185 = fmul float %181, %30 %186 = fadd float %185, %28 %187 = fmul float %182, %30 %188 = fadd float %187, %29 %189 = fmul float %79, %79 %190 = fmul float %80, %80 %191 = fadd float %190, %189 %192 = fmul float %81, %81 %193 = fadd float %191, %192 %194 = call float @llvm.AMDGPU.rsq.clamped.f32(float %193) %195 = call float @llvm.minnum.f32(float %194, float 0x47EFFFFFE0000000) %196 = fmul float %79, %195 %197 = fmul float %80, %195 %198 = fmul float %81, %195 %199 = fmul float %196, %121 %200 = fmul float %197, %122 %201 = fadd float %200, %199 %202 = fmul float %198, %123 %203 = fadd float %201, %202 %204 = fmul float %203, 5.000000e-01 %205 = fadd float %204, 5.000000e-01 %206 = fmul float %203, -5.000000e-01 %207 = fadd float %206, 5.000000e-01 %208 = fmul float %205, %205 %209 = fmul float %207, %207 %210 = fmul float %184, %209 %211 = fmul float %186, %209 %212 = fmul float %188, %209 %213 = fmul float %184, %208 %214 = fmul float %186, %208 %215 = fmul float %188, %208 %216 = fmul float %210, %46 %217 = fmul float %211, %47 %218 = fmul float %212, %48 %219 = fmul float %213, %43 %220 = fadd float %219, %216 %221 = fmul float %214, %44 %222 = fadd float %221, %217 %223 = fmul float %215, %45 %224 = fadd float %223, %218 %225 = bitcast float %82 to i32 %226 = bitcast float %83 to i32 %227 = insertelement <2 x i32> undef, i32 %225, i32 0 %228 = insertelement <2 x i32> %227, i32 %226, i32 1 %229 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %228, <32 x i8> %59, <16 x i8> %62, i32 2) %230 = extractelement <4 x float> %229, i32 0 %231 = extractelement <4 x float> %229, i32 1 %232 = extractelement <4 x float> %229, i32 2 %233 = fmul float %230, %31 %234 = fmul float %231, %32 %235 = fmul float %232, %33 %236 = fmul float %34, %233 %237 = fadd float %236, %38 %238 = fmul float %34, %234 %239 = fadd float %238, %38 %240 = fmul float %34, %235 %241 = fadd float %240, %38 %242 = fadd float %237, %24 %243 = fadd float %239, %25 %244 = fadd float %241, %26 %245 = fmul float %220, %52 %246 = fadd float %245, %242 %247 = fmul float %222, %52 %248 = fadd float %247, %243 %249 = fmul float %224, %52 %250 = fadd float %249, %244 %251 = fmul float %184, %49 %252 = fadd float %251, %246 %253 = fmul float %186, %50 %254 = fadd float %253, %248 %255 = fmul float %188, %51 %256 = fadd float %255, %250 %257 = fmul float %75, 3.906250e-03 %258 = call i32 @llvm.SI.packf16(float %252, float %254) %259 = bitcast i32 %258 to float %260 = call i32 @llvm.SI.packf16(float %256, float %257) %261 = bitcast i32 %260 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %259, float %261, float %259, float %261) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C s_load_dwordx8 s[36:43], s[6:7], 0x18 ; C0D20718 s_load_dwordx8 s[44:51], s[6:7], 0x0 ; C0D60700 s_load_dwordx8 s[8:15], s[6:7], 0x8 ; C0C40708 s_load_dwordx8 s[28:35], s[6:7], 0x10 ; C0CE0710 s_load_dwordx4 s[4:7], s[4:5], 0x4 ; C0820504 v_interp_p1_f32 v8, v0, 2, 2, [m0] ; C8200A00 v_interp_p2_f32 v8, [v8], v1, 2, 2, [m0] ; C8210A01 v_interp_p1_f32 v9, v0, 0, 3, [m0] ; C8240C00 v_interp_p2_f32 v9, [v9], v1, 0, 3, [m0] ; C8250C01 v_interp_p1_f32 v10, v0, 1, 3, [m0] ; C8280D00 v_interp_p2_f32 v10, [v10], v1, 1, 3, [m0] ; C8290D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[44:51], s[24:27] ; F0800700 00CB0B09 image_sample v0, 2, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[36:43], s[20:23] ; F0800200 00A90009 image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[28:35], s[16:19] ; F0800700 00870E09 image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[4:7] ; F0800700 00221109 v_mul_f32_e32 v1, v3, v3 ; 10020703 v_mad_f32 v1, v4, v4, v1 ; D2820001 04060904 v_mad_f32 v1, v5, v5, v1 ; D2820001 04060B05 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 s_waitcnt vmcnt(3) ; BF8C0773 v_mad_f32 v9, 2.0, v11, -1.0 ; D2820009 03CE16F4 v_mad_f32 v10, 2.0, v12, -1.0 ; D282000A 03CE18F4 v_mad_f32 v11, 2.0, v13, -1.0 ; D282000B 03CE1AF4 v_min_f32_e32 v1, 0x7f7fffff, v1 ; 1E0202FF 7F7FFFFF v_mul_f32_e32 v3, v1, v3 ; 10060701 v_mul_f32_e32 v4, v1, v4 ; 10080901 v_mul_f32_e32 v1, v1, v5 ; 10020B01 v_mul_f32_e32 v3, v3, v9 ; 10061303 v_mad_f32 v3, v10, v4, v3 ; D2820003 040E090A v_mad_f32 v1, v11, v1, v3 ; D2820001 040E030B s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mov_b32_e32 v3, 0xb58637bd ; 7E0602FF B58637BD v_add_f32_e32 v3, v1, v3 ; 06060701 v_and_b32_e32 v1, 0x7fffffff, v1 ; 360202FF 7FFFFFFF v_log_f32_e32 v1, v1 ; 7E024F01 s_buffer_load_dword s5, s[0:3], 0x21 ; C2028121 s_buffer_load_dword s6, s[0:3], 0x23 ; C2030123 s_buffer_load_dword s7, s[0:3], 0x24 ; C2038124 s_buffer_load_dword s8, s[0:3], 0x28 ; C2040128 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_legacy_f32_e32 v1, s4, v1 ; 0E020204 v_exp_f32_e32 v1, v1 ; 7E024B01 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 v_mul_f32_e32 v3, 0x3e99999a, v14 ; 10061CFF 3E99999A v_mov_b32_e32 v4, 0x3f170a3d ; 7E0802FF 3F170A3D v_mad_f32 v3, v15, v4, v3 ; D2820003 040E090F v_mov_b32_e32 v5, 0x3de147ae ; 7E0A02FF 3DE147AE v_mad_f32 v3, v16, v5, v3 ; D2820003 040E0B10 v_sub_f32_e64 v12, 1.0, s5 ; D208000C 00000AF2 v_mul_f32_e32 v13, v14, v12 ; 101A190E s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_buffer_load_dword s9, s[0:3], 0x1d ; C204811D s_buffer_load_dword s10, s[0:3], 0x1e ; C205011E v_mad_f32 v13, s5, v3, v13 ; D282000D 04360605 v_mul_f32_e32 v20, v15, v12 ; 1028190F v_mad_f32 v20, s5, v3, v20 ; D2820014 04520605 v_mul_f32_e32 v12, v16, v12 ; 10181910 v_mad_f32 v3, s5, v3, v12 ; D2820003 04320605 v_mul_f32_e32 v0, v0, v1 ; 10000300 s_buffer_load_dword s5, s[0:3], 0x20 ; C2028120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v12, s4, v1 ; 10180204 v_mul_f32_e32 v21, s9, v1 ; 102A0209 v_mul_f32_e32 v1, s10, v1 ; 1002020A v_mad_f32 v12, v13, v12, -v14 ; D282000C 843A190D v_mad_f32 v13, v20, v21, -v15 ; D282000D 843E2B14 v_mad_f32 v1, v3, v1, -v16 ; D2820001 84420303 v_mad_f32 v3, v0, v12, v14 ; D2820003 043A1900 v_mad_f32 v12, v0, v13, v15 ; D282000C 043E1B00 v_mad_f32 v0, v0, v1, v16 ; D2820000 04420300 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_buffer_load_dword s9, s[0:3], 0x0 ; C2048100 s_buffer_load_dword s10, s[0:3], 0x1 ; C2050101 s_buffer_load_dword s11, s[0:3], 0x2 ; C2058102 s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110 s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111 s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112 s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113 s_buffer_load_dword s16, s[0:3], 0x18 ; C2080118 s_buffer_load_dword s17, s[0:3], 0x19 ; C2088119 s_buffer_load_dword s18, s[0:3], 0x1a ; C209011A s_buffer_load_dword s19, s[0:3], 0x29 ; C2098129 s_buffer_load_dword s20, s[0:3], 0x2a ; C20A012A s_buffer_load_dword s21, s[0:3], 0x2c ; C20A812C s_buffer_load_dword s22, s[0:3], 0x2d ; C20B012D s_buffer_load_dword s23, s[0:3], 0x2e ; C20B812E s_buffer_load_dword s24, s[0:3], 0x30 ; C20C0130 s_buffer_load_dword s25, s[0:3], 0x31 ; C20C8131 s_buffer_load_dword s26, s[0:3], 0x32 ; C20D0132 s_buffer_load_dword s0, s[0:3], 0x33 ; C2000133 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s16, v17 ; 10022210 v_mul_f32_e32 v13, s17, v18 ; 101A2411 v_mul_f32_e32 v14, s18, v19 ; 101C2612 v_mul_f32_e32 v15, 0x3e99999a, v3 ; 101E06FF 3E99999A v_mad_f32 v4, v12, v4, v15 ; D2820004 043E090C v_mad_f32 v4, v0, v5, v4 ; D2820004 04120B00 v_sub_f32_e64 v5, 1.0, s6 ; D2080005 00000CF2 v_mul_f32_e32 v3, v3, v5 ; 10060B03 v_mul_f32_e32 v12, v12, v5 ; 10180B0C v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mad_f32 v3, s6, v4, v3 ; D2820003 040E0806 v_mad_f32 v5, s6, v4, v12 ; D2820005 04320806 v_mad_f32 v0, s6, v4, v0 ; D2820000 04020806 v_mul_f32_e32 v3, s7, v3 ; 10060607 v_mul_f32_e32 v4, s7, v5 ; 10080A07 v_mul_f32_e32 v0, s7, v0 ; 10000007 v_sub_f32_e64 v5, 1.0, s9 ; D2080005 000012F2 v_mul_f32_e32 v3, v5, v3 ; 10060705 v_sub_f32_e64 v5, 1.0, s10 ; D2080005 000014F2 v_mul_f32_e32 v4, v5, v4 ; 10080905 v_sub_f32_e64 v5, 1.0, s11 ; D2080005 000016F2 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mov_b32_e32 v5, s12 ; 7E0A020C v_mad_f32 v3, s15, v3, v5 ; D2820003 0416060F v_mov_b32_e32 v5, s13 ; 7E0A020D v_mad_f32 v4, s15, v4, v5 ; D2820004 0416080F v_mov_b32_e32 v5, s14 ; 7E0A020E v_mul_f32_e32 v12, v9, v9 ; 10181309 v_mad_f32 v12, v10, v10, v12 ; D282000C 0432150A v_mad_f32 v12, v11, v11, v12 ; D282000C 0432170B v_rsq_clamp_f32_e32 v12, v12 ; 7E18590C v_mul_f32_e32 v15, v6, v6 ; 101E0D06 v_mad_f32 v15, v7, v7, v15 ; D282000F 043E0F07 v_mad_f32 v15, v8, v8, v15 ; D282000F 043E1108 v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F v_mad_f32 v0, s15, v0, v5 ; D2820000 0416000F v_min_f32_e32 v5, 0x7f7fffff, v12 ; 1E0A18FF 7F7FFFFF v_mul_f32_e32 v9, v5, v9 ; 10121305 v_min_f32_e32 v12, 0x7f7fffff, v15 ; 1E181EFF 7F7FFFFF v_mul_f32_e32 v6, v12, v6 ; 100C0D0C v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v9, v5, v10 ; 10121505 v_mul_f32_e32 v7, v12, v7 ; 100E0F0C v_mad_f32 v6, v7, v9, v6 ; D2820006 041A1307 v_mul_f32_e32 v5, v5, v11 ; 100A1705 v_mul_f32_e32 v7, v12, v8 ; 100E110C v_mad_f32 v5, v7, v5, v6 ; D2820005 041A0B07 v_mad_f32 v6, -0.5, v5, 0.5 ; D2820006 03C20AF1 v_mul_f32_e32 v6, v6, v6 ; 100C0D06 v_mul_f32_e32 v7, v6, v3 ; 100E0706 v_mul_f32_e32 v7, s21, v7 ; 100E0E15 v_mad_f32 v5, 0.5, v5, 0.5 ; D2820005 03C20AF0 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_mul_f32_e32 v8, v5, v3 ; 10100705 v_mad_f32 v7, v8, s8, v7 ; D2820007 041C1108 v_mul_f32_e32 v8, v6, v4 ; 10100906 v_mul_f32_e32 v8, s22, v8 ; 10101016 v_mul_f32_e32 v9, v5, v4 ; 10120905 v_mad_f32 v8, v9, s19, v8 ; D2820008 04202709 v_mul_f32_e32 v6, v6, v0 ; 100C0106 v_mul_f32_e32 v6, s23, v6 ; 100C0C17 v_mul_f32_e32 v5, v5, v0 ; 100A0105 v_mad_f32 v5, v5, s20, v6 ; D2820005 04182905 v_mov_b32_e32 v6, s5 ; 7E0C0205 v_mad_f32 v1, v1, s4, v6 ; D2820001 04180901 v_mov_b32_e32 v6, s5 ; 7E0C0205 v_mad_f32 v6, v13, s4, v6 ; D2820006 0418090D v_mov_b32_e32 v9, s5 ; 7E120205 v_mad_f32 v9, v14, s4, v9 ; D2820009 0424090E v_add_f32_e32 v1, s9, v1 ; 06020209 v_add_f32_e32 v6, s10, v6 ; 060C0C0A v_add_f32_e32 v9, s11, v9 ; 0612120B v_mad_f32 v1, v7, s0, v1 ; D2820001 04040107 v_mad_f32 v6, v8, s0, v6 ; D2820006 04180108 v_mad_f32 v5, v5, s0, v9 ; D2820005 04240105 v_mad_f32 v1, v3, s24, v1 ; D2820001 04043103 v_mad_f32 v3, v4, s25, v6 ; D2820003 04183304 v_mad_f32 v0, v0, s26, v5 ; D2820000 04143500 v_mul_f32_e32 v2, 0x3b800000, v2 ; 100404FF 3B800000 v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 24 Code Size: 1052 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[4], PERSPECTIVE DCL IN[2], TEXCOORD[6], PERSPECTIVE DCL IN[3], TEXCOORD[7], PERSPECTIVE DCL IN[4], FACE, CONSTANT DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..7] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, -0.3333} IMM[1] FLT32 { -0.0000, 8.0000, 0.0398, 0.0000} IMM[2] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[2], IN[2] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[2], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[1], IN[1] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[1], TEMP[0].xxxx 8: TEX TEMP[3], IN[0], SAMP[1], 2D 9: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[0].x, TEMP[3], TEMP[3] 11: RSQ TEMP[0].x, TEMP[0].xxxx 12: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 13: MUL TEMP[4].xyz, TEMP[3], TEMP[0].xxxx 14: CMP TEMP[1].w, IN[4].xxxx, IMM[0].zzzz, IMM[0].yyyy 15: MUL TEMP[1].w, TEMP[1].wwww, CONST[0].xxxx 16: MUL TEMP[3].xyz, TEMP[1].wwww, TEMP[4] 17: DP3 TEMP[1].w, TEMP[3], TEMP[1] 18: MUL TEMP[4].xyz, TEMP[1].wwww, TEMP[3] 19: MAD TEMP[1].xyz, TEMP[4], IMM[0].xxxx, -TEMP[1] 20: TEX TEMP[4], IN[0], SAMP[4], 2D 21: ADD TEMP[4], TEMP[4].xxxx, IMM[0].wwww 22: KILL_IF TEMP[4] 23: UIF CONST[240].xxxx :0 24: MUL TEMP[4].xyz, CONST[7].xyww, IN[3].yyyy 25: MAD TEMP[4].xyz, CONST[6].xyww, IN[3].xxxx, TEMP[4] 26: MAD TEMP[4].xyz, CONST[8].xyww, IN[3].zzzz, TEMP[4] 27: MAD TEMP[4].xyz, CONST[9].xyww, IN[3].wwww, TEMP[4] 28: RCP TEMP[1].w, TEMP[4].zzzz 29: MUL TEMP[4].xy, TEMP[1].wwww, TEMP[4] 30: MAD TEMP[4].xy, TEMP[4], CONST[1], CONST[1].wzzw 31: TEX TEMP[4], TEMP[4], SAMP[0], 2D 32: UIF CONST[240].yyyy :0 33: DP3 TEMP[1].w, IN[3], IN[3] 34: RSQ TEMP[0], |TEMP[1].wwww| 35: MIN TEMP[1].w, IMM[2].xxxx, TEMP[0] 36: RCP TEMP[1].w, TEMP[1].wwww 37: ADD TEMP[1].w, -TEMP[1].wwww, CONST[10].xxxx 38: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[10].yyyy 39: MUL TEMP[1].w, TEMP[1].wwww, TEMP[1].wwww 40: ADD TEMP[2].w, TEMP[4].wwww, IMM[0].yyyy 41: MAD TEMP[5].xyz, TEMP[1].wwww, TEMP[2].wwww, IMM[0].zzzz 42: ELSE :44 43: MOV TEMP[5].xyz, IMM[0].zzzz 44: ENDIF 45: MUL TEMP[4].xyz, TEMP[4], TEMP[5] 46: ELSE :48 47: MOV TEMP[4].xyz, IMM[0].zzzz 48: ENDIF 49: MOV TEMP[5].z, IMM[0].zzzz 50: ADD TEMP[5].xyz, TEMP[5].zzzz, -CONST[11] 51: TEX TEMP[6], IN[0], SAMP[2], 2D 52: MUL TEMP[5].xyz, TEMP[5], TEMP[6] 53: MAD TEMP[5].xyz, TEMP[5], CONST[4].wwww, CONST[4] 54: MUL TEMP[6].xyz, CONST[12], CONST[12].wwww 55: TEX TEMP[7], IN[0], SAMP[3], 2D 56: MUL TEMP[6].xyz, TEMP[6], TEMP[7] 57: MAD TEMP[6].xyz, TEMP[6], CONST[5].wwww, CONST[5] 58: DP3_SAT TEMP[1].w, TEMP[3], TEMP[2] 59: ADD TEMP[2].w, TEMP[1].wwww, IMM[1].xxxx 60: DP3_SAT TEMP[1].x, TEMP[1], TEMP[2] 61: ADD TEMP[1].y, TEMP[1].xxxx, IMM[1].xxxx 62: POW TEMP[2].x, |TEMP[1].xxxx|, CONST[13].xxxx 63: MOV TEMP[2].y, IMM[1].yyyy 64: ADD TEMP[1].x, TEMP[2].yyyy, CONST[13].xxxx 65: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 66: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz 67: MUL TEMP[2].xyz, TEMP[1].wwww, TEMP[5] 68: CMP TEMP[2].xyz, TEMP[2].wwww, IMM[1].wwww, TEMP[2] 69: MUL TEMP[1].xzw, TEMP[6].xyyz, TEMP[1].xxxx 70: CMP TEMP[1].xyz, TEMP[1].yyyy, IMM[1].wwww, TEMP[1].xzww 71: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 72: MUL TEMP[1].xyz, TEMP[4], TEMP[1] 73: MUL OUT[0].xyz, TEMP[1], CONST[14] 74: MOV OUT[0].w, IMM[1].wwww 75: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %47 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %48 = load <32 x i8>, <32 x i8> addrspace(2)* %47, align 32, !tbaa !0 %49 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 %51 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %52 = bitcast <8 x i32> addrspace(2)* %51 to <32 x i8> addrspace(2)* %53 = load <32 x i8>, <32 x i8> addrspace(2)* %52, align 32, !tbaa !0 %54 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %55 = bitcast <4 x i32> addrspace(2)* %54 to <16 x i8> addrspace(2)* %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 %57 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %58 = bitcast <8 x i32> addrspace(2)* %57 to <32 x i8> addrspace(2)* %59 = load <32 x i8>, <32 x i8> addrspace(2)* %58, align 32, !tbaa !0 %60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %61 = bitcast <4 x i32> addrspace(2)* %60 to <16 x i8> addrspace(2)* %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %64 = bitcast <8 x i32> addrspace(2)* %63 to <32 x i8> addrspace(2)* %65 = load <32 x i8>, <32 x i8> addrspace(2)* %64, align 32, !tbaa !0 %66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %67 = bitcast <4 x i32> addrspace(2)* %66 to <16 x i8> addrspace(2)* %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %70 = bitcast <8 x i32> addrspace(2)* %69 to <32 x i8> addrspace(2)* %71 = load <32 x i8>, <32 x i8> addrspace(2)* %70, align 32, !tbaa !0 %72 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %73 = bitcast <4 x i32> addrspace(2)* %72 to <16 x i8> addrspace(2)* %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %87 = fmul float %80, %80 %88 = fmul float %81, %81 %89 = fadd float %88, %87 %90 = fmul float %82, %82 %91 = fadd float %89, %90 %92 = call float @llvm.AMDGPU.rsq.clamped.f32(float %91) %93 = call float @llvm.minnum.f32(float %92, float 0x47EFFFFFE0000000) %94 = fmul float %80, %93 %95 = fmul float %81, %93 %96 = fmul float %82, %93 %97 = fmul float %77, %77 %98 = fmul float %78, %78 %99 = fadd float %98, %97 %100 = fmul float %79, %79 %101 = fadd float %99, %100 %102 = call float @llvm.AMDGPU.rsq.clamped.f32(float %101) %103 = call float @llvm.minnum.f32(float %102, float 0x47EFFFFFE0000000) %104 = fmul float %77, %103 %105 = fmul float %78, %103 %106 = fmul float %79, %103 %107 = bitcast float %75 to i32 %108 = bitcast float %76 to i32 %109 = insertelement <2 x i32> undef, i32 %107, i32 0 %110 = insertelement <2 x i32> %109, i32 %108, i32 1 %111 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %110, <32 x i8> %53, <16 x i8> %56, i32 2) %112 = extractelement <4 x float> %111, i32 0 %113 = extractelement <4 x float> %111, i32 1 %114 = extractelement <4 x float> %111, i32 2 %115 = fmul float %112, 2.000000e+00 %116 = fadd float %115, -1.000000e+00 %117 = fmul float %113, 2.000000e+00 %118 = fadd float %117, -1.000000e+00 %119 = fmul float %114, 2.000000e+00 %120 = fadd float %119, -1.000000e+00 %121 = fmul float %116, %116 %122 = fmul float %118, %118 %123 = fadd float %122, %121 %124 = fmul float %120, %120 %125 = fadd float %123, %124 %126 = call float @llvm.AMDGPU.rsq.clamped.f32(float %125) %127 = call float @llvm.minnum.f32(float %126, float 0x47EFFFFFE0000000) %128 = fmul float %116, %127 %129 = fmul float %118, %127 %130 = fmul float %120, %127 %131 = call float @llvm.AMDGPU.cndlt(float %18, float 1.000000e+00, float -1.000000e+00) %132 = fmul float %131, %24 %133 = fmul float %132, %128 %134 = fmul float %132, %129 %135 = fmul float %132, %130 %136 = fmul float %133, %94 %137 = fmul float %134, %95 %138 = fadd float %137, %136 %139 = fmul float %135, %96 %140 = fadd float %138, %139 %141 = fmul float %140, %133 %142 = fmul float %140, %134 %143 = fmul float %140, %135 %144 = fmul float %141, 2.000000e+00 %145 = fsub float %144, %94 %146 = fmul float %142, 2.000000e+00 %147 = fsub float %146, %95 %148 = fmul float %143, 2.000000e+00 %149 = fsub float %148, %96 %150 = bitcast float %75 to i32 %151 = bitcast float %76 to i32 %152 = insertelement <2 x i32> undef, i32 %150, i32 0 %153 = insertelement <2 x i32> %152, i32 %151, i32 1 %154 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %153, <32 x i8> %71, <16 x i8> %74, i32 2) %155 = extractelement <4 x float> %154, i32 0 %156 = fadd float %155, 0xBFD554C980000000 %157 = fadd float %155, 0xBFD554C980000000 %158 = fadd float %155, 0xBFD554C980000000 %159 = fadd float %155, 0xBFD554C980000000 %160 = fcmp olt float %156, 0.000000e+00 %161 = fcmp olt float %157, 0.000000e+00 %162 = fcmp olt float %158, 0.000000e+00 %163 = fcmp olt float %159, 0.000000e+00 %164 = or i1 %163, %162 %165 = or i1 %164, %161 %166 = or i1 %165, %160 %167 = select i1 %166, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %167) %168 = bitcast float %46 to i32 %169 = icmp eq i32 %168, 0 br i1 %169, label %ENDIF, label %IF IF: ; preds = %main_body %170 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3844) %171 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %172 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %173 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %174 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %175 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %176 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %177 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %178 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %179 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %180 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %181 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %182 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %183 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %184 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %185 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %186 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %187 = fmul float %179, %84 %188 = fmul float %178, %84 %189 = fmul float %177, %84 %190 = fmul float %182, %83 %191 = fadd float %190, %187 %192 = fmul float %181, %83 %193 = fadd float %192, %188 %194 = fmul float %180, %83 %195 = fadd float %194, %189 %196 = fmul float %176, %85 %197 = fadd float %196, %191 %198 = fmul float %175, %85 %199 = fadd float %198, %193 %200 = fmul float %174, %85 %201 = fadd float %200, %195 %202 = fmul float %173, %86 %203 = fadd float %202, %197 %204 = fmul float %172, %86 %205 = fadd float %204, %199 %206 = fmul float %171, %86 %207 = fadd float %206, %201 %208 = fdiv float 1.000000e+00, %207 %209 = fmul float %208, %203 %210 = fmul float %208, %205 %211 = fmul float %209, %186 %212 = fadd float %211, %183 %213 = fmul float %210, %185 %214 = fadd float %213, %184 %215 = bitcast float %212 to i32 %216 = bitcast float %214 to i32 %217 = insertelement <2 x i32> undef, i32 %215, i32 0 %218 = insertelement <2 x i32> %217, i32 %216, i32 1 %219 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %218, <32 x i8> %48, <16 x i8> %50, i32 2) %220 = extractelement <4 x float> %219, i32 0 %221 = extractelement <4 x float> %219, i32 1 %222 = extractelement <4 x float> %219, i32 2 %223 = bitcast float %170 to i32 %224 = icmp eq i32 %223, 0 br i1 %224, label %ENDIF32, label %IF33 ENDIF: ; preds = %main_body, %ENDIF32 %temp16.0 = phi float [ %330, %ENDIF32 ], [ 1.000000e+00, %main_body ] %temp17.0 = phi float [ %331, %ENDIF32 ], [ 1.000000e+00, %main_body ] %temp18.0 = phi float [ %332, %ENDIF32 ], [ 1.000000e+00, %main_body ] %225 = fsub float 1.000000e+00, %35 %226 = fsub float 1.000000e+00, %36 %227 = fsub float 1.000000e+00, %37 %228 = bitcast float %75 to i32 %229 = bitcast float %76 to i32 %230 = insertelement <2 x i32> undef, i32 %228, i32 0 %231 = insertelement <2 x i32> %230, i32 %229, i32 1 %232 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %231, <32 x i8> %59, <16 x i8> %62, i32 2) %233 = extractelement <4 x float> %232, i32 0 %234 = extractelement <4 x float> %232, i32 1 %235 = extractelement <4 x float> %232, i32 2 %236 = fmul float %225, %233 %237 = fmul float %226, %234 %238 = fmul float %227, %235 %239 = fmul float %236, %28 %240 = fadd float %239, %25 %241 = fmul float %237, %28 %242 = fadd float %241, %26 %243 = fmul float %238, %28 %244 = fadd float %243, %27 %245 = fmul float %38, %41 %246 = fmul float %39, %41 %247 = fmul float %40, %41 %248 = bitcast float %75 to i32 %249 = bitcast float %76 to i32 %250 = insertelement <2 x i32> undef, i32 %248, i32 0 %251 = insertelement <2 x i32> %250, i32 %249, i32 1 %252 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %251, <32 x i8> %65, <16 x i8> %68, i32 2) %253 = extractelement <4 x float> %252, i32 0 %254 = extractelement <4 x float> %252, i32 1 %255 = extractelement <4 x float> %252, i32 2 %256 = fmul float %245, %253 %257 = fmul float %246, %254 %258 = fmul float %247, %255 %259 = fmul float %256, %32 %260 = fadd float %259, %29 %261 = fmul float %257, %32 %262 = fadd float %261, %30 %263 = fmul float %258, %32 %264 = fadd float %263, %31 %265 = fmul float %133, %104 %266 = fmul float %134, %105 %267 = fadd float %266, %265 %268 = fmul float %135, %106 %269 = fadd float %267, %268 %270 = call float @llvm.AMDIL.clamp.(float %269, float 0.000000e+00, float 1.000000e+00) %271 = fadd float %270, 0xBEB0C6F7A0000000 %272 = fmul float %145, %104 %273 = fmul float %147, %105 %274 = fadd float %273, %272 %275 = fmul float %149, %106 %276 = fadd float %274, %275 %277 = call float @llvm.AMDIL.clamp.(float %276, float 0.000000e+00, float 1.000000e+00) %278 = fadd float %277, 0xBEB0C6F7A0000000 %279 = call float @fabs(float %277) %280 = call float @llvm.pow.f32(float %279, float %42) %281 = fadd float %42, 8.000000e+00 %282 = fmul float %281, %280 %283 = fmul float %282, 0x3FA45F3060000000 %284 = fmul float %270, %240 %285 = fmul float %270, %242 %286 = fmul float %270, %244 %287 = call float @llvm.AMDGPU.cndlt(float %271, float 0.000000e+00, float %284) %288 = call float @llvm.AMDGPU.cndlt(float %271, float 0.000000e+00, float %285) %289 = call float @llvm.AMDGPU.cndlt(float %271, float 0.000000e+00, float %286) %290 = fmul float %260, %283 %291 = fmul float %262, %283 %292 = fmul float %264, %283 %293 = call float @llvm.AMDGPU.cndlt(float %278, float 0.000000e+00, float %290) %294 = call float @llvm.AMDGPU.cndlt(float %278, float 0.000000e+00, float %291) %295 = call float @llvm.AMDGPU.cndlt(float %278, float 0.000000e+00, float %292) %296 = fadd float %293, %287 %297 = fadd float %294, %288 %298 = fadd float %295, %289 %299 = fmul float %temp16.0, %296 %300 = fmul float %temp17.0, %297 %301 = fmul float %temp18.0, %298 %302 = fmul float %299, %43 %303 = fmul float %300, %44 %304 = fmul float %301, %45 %305 = call i32 @llvm.SI.packf16(float %302, float %303) %306 = bitcast i32 %305 to float %307 = call i32 @llvm.SI.packf16(float %304, float 0.000000e+00) %308 = bitcast i32 %307 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %306, float %308, float %306, float %308) ret void IF33: ; preds = %IF %309 = extractelement <4 x float> %219, i32 3 %310 = fmul float %83, %83 %311 = fmul float %84, %84 %312 = fadd float %311, %310 %313 = fmul float %85, %85 %314 = fadd float %312, %313 %315 = call float @fabs(float %314) %316 = call float @llvm.AMDGPU.rsq.clamped.f32(float %315) %317 = call float @llvm.minnum.f32(float %316, float 0x47EFFFFFE0000000) %318 = fdiv float 1.000000e+00, %317 %319 = fsub float %33, %318 %320 = fmul float %319, %34 %321 = call float @llvm.AMDIL.clamp.(float %320, float 0.000000e+00, float 1.000000e+00) %322 = fmul float %321, %321 %323 = fadd float %309, -1.000000e+00 %324 = fmul float %322, %323 %325 = fadd float %324, 1.000000e+00 %326 = fmul float %322, %323 %327 = fadd float %326, 1.000000e+00 %328 = fmul float %322, %323 %329 = fadd float %328, 1.000000e+00 br label %ENDIF32 ENDIF32: ; preds = %IF, %IF33 %temp20.0 = phi float [ %325, %IF33 ], [ 1.000000e+00, %IF ] %temp21.0 = phi float [ %327, %IF33 ], [ 1.000000e+00, %IF ] %temp22.0 = phi float [ %329, %IF33 ], [ 1.000000e+00, %IF ] %330 = fmul float %220, %temp20.0 %331 = fmul float %221, %temp21.0 %332 = fmul float %222, %temp22.0 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_movk_i32 s12, 0xf00 ; B00C0F00 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v10, -1.0, 1.0, vcc ; D200000A 01A9E4F3 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800 v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 s_load_dwordx8 s[28:35], s[6:7], 0x20 ; C0CE0720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[8:11] ; F0800700 00450B02 image_sample v14, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[16:19] ; F0800100 00870E02 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v16, 2.0, v11, -1.0 ; D2820010 03CE16F4 v_mad_f32 v17, 2.0, v12, -1.0 ; D2820011 03CE18F4 v_mad_f32 v12, 2.0, v13, -1.0 ; D282000C 03CE1AF4 v_mov_b32_e32 v11, 0xbeaaa64c ; 7E1602FF BEAAA64C s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v11, v14, v11 ; 0616170E v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_cndmask_b32_e64 v11, 0, -1.0, vcc ; D200000B 01A9E680 v_cndmask_b32_e64 v11, v11, -1.0, vcc ; D200000B 01A9E70B v_cndmask_b32_e64 v11, v11, -1.0, vcc ; D200000B 01A9E70B v_cndmask_b32_e64 v14, v11, -1.0, vcc ; D200000E 01A9E70B s_buffer_load_dword s12, s[0:3], s12 ; C206000C v_mul_f32_e32 v11, v9, v9 ; 10161309 v_mad_f32 v11, v8, v8, v11 ; D282000B 042E1108 v_mad_f32 v11, v7, v7, v11 ; D282000B 042E0F07 v_rsq_clamp_f32_e32 v18, v11 ; 7E24590B v_mul_f32_e32 v11, v4, v4 ; 10160904 v_mad_f32 v11, v5, v5, v11 ; D282000B 042E0B05 v_mad_f32 v11, v6, v6, v11 ; D282000B 042E0D06 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B s_buffer_load_dword s10, s[0:3], 0x0 ; C2050100 s_buffer_load_dword s11, s[0:3], 0x13 ; C2058113 s_buffer_load_dword s8, s[0:3], 0x17 ; C2040117 s_buffer_load_dword s9, s[0:3], 0x33 ; C2048133 v_mul_f32_e32 v13, v16, v16 ; 101A2110 v_mad_f32 v13, v17, v17, v13 ; D282000D 04362311 v_mad_f32 v13, v12, v12, v13 ; D282000D 0436190C v_rsq_clamp_f32_e32 v19, v13 ; 7E26590D v_mov_b32_e32 v13, 1.0 ; 7E1A02F2 v_cmpx_le_f32_e32 vcc, 0, v14 ; 7C261C80 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ne_i32_e64 s[12:13], 0, s12 ; D10A000C 00001880 v_mov_b32_e32 v14, 1.0 ; 7E1C02F2 v_mov_b32_e32 v15, 1.0 ; 7E1E02F2 s_and_saveexec_b64 s[12:13], s[12:13] ; BE8C240C s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_cbranch_execz BB0_4 ; BF880000 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 v_interp_p1_f32 v13, v0, 0, 3, [m0] ; C8340C00 v_interp_p2_f32 v13, [v13], v1, 0, 3, [m0] ; C8350C01 v_interp_p1_f32 v14, v0, 1, 3, [m0] ; C8380D00 v_interp_p2_f32 v14, [v14], v1, 1, 3, [m0] ; C8390D01 s_buffer_load_dword s14, s[0:3], 0x1c ; C207011C s_buffer_load_dword s15, s[0:3], 0x18 ; C2078118 v_interp_p1_f32 v15, v0, 2, 3, [m0] ; C83C0E00 v_interp_p2_f32 v15, [v15], v1, 2, 3, [m0] ; C83D0E01 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 s_buffer_load_dword s28, s[0:3], 0x1d ; C20E011D s_buffer_load_dword s29, s[0:3], 0x19 ; C20E8119 s_buffer_load_dword s30, s[0:3], 0x1f ; C20F011F s_buffer_load_dword s31, s[0:3], 0x1b ; C20F811B v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s14, v14 ; 10021C0E v_mad_f32 v1, s15, v13, v1 ; D2820001 04061A0F s_buffer_load_dword s14, s[0:3], 0x20 ; C2070120 s_buffer_load_dword s15, s[0:3], 0x23 ; C2078123 s_buffer_load_dword s32, s[0:3], 0x27 ; C2100127 v_mul_f32_e32 v20, s28, v14 ; 10281C1C v_mad_f32 v20, s29, v13, v20 ; D2820014 04521A1D v_mul_f32_e32 v21, s30, v14 ; 102A1C1E v_mad_f32 v21, s31, v13, v21 ; D2820015 04561A1F s_buffer_load_dword s28, s[0:3], 0x21 ; C20E0121 s_buffer_load_dword s29, s[0:3], 0x24 ; C20E8124 s_buffer_load_dword s30, s[0:3], 0x25 ; C20F0125 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s14, v15, v1 ; D2820001 04061E0E v_mad_f32 v21, s15, v15, v21 ; D2820015 04561E0F v_mad_f32 v21, s32, v0, v21 ; D2820015 04560020 s_buffer_load_dword s14, s[0:3], 0x7 ; C2070107 s_buffer_load_dword s15, s[0:3], 0x6 ; C2078106 s_buffer_load_dword s31, s[0:3], 0x4 ; C20F8104 s_buffer_load_dword s32, s[0:3], 0x5 ; C2100105 s_movk_i32 s33, 0xf04 ; B0210F04 s_buffer_load_dword s33, s[0:3], s33 ; C2108021 v_rcp_f32_e32 v21, v21 ; 7E2A5515 v_mad_f32 v20, s28, v15, v20 ; D2820014 04521E1C v_mad_f32 v1, s29, v0, v1 ; D2820001 0406001D v_mad_f32 v0, s30, v0, v20 ; D2820000 0452001E v_mul_f32_e32 v1, v1, v21 ; 10022B01 v_mul_f32_e32 v0, v0, v21 ; 10002B00 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v20, s14 ; 7E28020E v_mad_f32 v20, s31, v1, v20 ; D2820014 0452021F v_mov_b32_e32 v1, s15 ; 7E02020F v_mad_f32 v21, s32, v0, v1 ; D2820015 04060020 image_sample v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[16:23], s[24:27] ; F0800F00 00C41414 v_cmp_ne_i32_e64 s[14:15], 0, s33 ; D10A000E 00004280 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[14:15], s[14:15] ; BE8E240E s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E s_cbranch_execz BB0_5 ; BF880000 s_buffer_load_dword s16, s[0:3], 0x28 ; C2080128 s_buffer_load_dword s17, s[0:3], 0x29 ; C2088129 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v13, v13 ; 10001B0D v_mad_f32 v0, v14, v14, v0 ; D2820000 04021D0E v_mad_f32 v0, v15, v15, v0 ; D2820000 04021F0F v_rsq_clamp_f32_e64 v0, |v0| ; D3580100 00000100 v_add_f32_e32 v1, -1.0, v23 ; 06022EF3 v_min_f32_e32 v0, 0x7f7fffff, v0 ; 1E0000FF 7F7FFFFF v_rcp_f32_e32 v0, v0 ; 7E005500 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v0, s16, v0 ; 08000010 v_mul_f32_e32 v0, s17, v0 ; 10000011 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mad_f32 v0, v0, v1, 1.0 ; D2820000 03CA0300 s_or_b64 exec, exec, s[14:15] ; 88FE0E7E v_mul_f32_e32 v15, v0, v20 ; 101E2900 v_mul_f32_e32 v14, v0, v21 ; 101C2B00 v_mul_f32_e32 v13, v0, v22 ; 101A2D00 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E v_mul_f32_e32 v1, s10, v10 ; 1002140A v_mov_b32_e32 v0, s11 ; 7E00020B v_min_f32_e32 v18, 0x7f7fffff, v18 ; 1E2424FF 7F7FFFFF v_min_f32_e32 v10, 0x7f7fffff, v19 ; 1E1426FF 7F7FFFFF v_mul_f32_e32 v16, v10, v16 ; 1020210A v_mul_f32_e32 v17, v10, v17 ; 1022230A s_buffer_load_dword s13, s[0:3], 0x10 ; C2068110 s_buffer_load_dword s14, s[0:3], 0x11 ; C2070111 s_buffer_load_dword s15, s[0:3], 0x12 ; C2078112 s_buffer_load_dword s16, s[0:3], 0x14 ; C2080114 s_buffer_load_dword s17, s[0:3], 0x15 ; C2088115 s_buffer_load_dword s18, s[0:3], 0x16 ; C2090116 s_buffer_load_dword s22, s[0:3], 0x2c ; C20B012C s_buffer_load_dword s23, s[0:3], 0x2d ; C20B812D s_buffer_load_dword s24, s[0:3], 0x2e ; C20C012E s_buffer_load_dword s19, s[0:3], 0x30 ; C2098130 s_buffer_load_dword s20, s[0:3], 0x31 ; C20A0131 s_buffer_load_dword s21, s[0:3], 0x32 ; C20A8132 s_buffer_load_dword s12, s[0:3], 0x34 ; C2060134 s_buffer_load_dword s10, s[0:3], 0x38 ; C2050138 s_buffer_load_dword s11, s[0:3], 0x39 ; C2058139 s_buffer_load_dword s0, s[0:3], 0x3a ; C200013A v_mul_f32_e32 v19, v10, v12 ; 1026190A v_mul_f32_e32 v10, v16, v1 ; 10140310 v_mul_f32_e32 v12, v17, v1 ; 10180311 v_mul_f32_e32 v16, v19, v1 ; 10200313 v_mul_f32_e32 v1, v18, v9 ; 10021312 v_mul_f32_e32 v1, v1, v10 ; 10021501 v_mul_f32_e32 v17, v18, v8 ; 10221112 v_mad_f32 v1, v12, v17, v1 ; D2820001 0406230C v_mul_f32_e32 v17, v18, v7 ; 10220F12 v_mad_f32 v1, v16, v17, v1 ; D2820001 04062310 v_mul_f32_e32 v17, v10, v1 ; 1022030A v_mad_f32 v17, v1, v10, v17 ; D2820011 04461501 v_mad_f32 v9, -v9, v18, v17 ; D2820009 24462509 v_mul_f32_e32 v17, v12, v1 ; 1022030C s_load_dwordx4 s[28:31], s[4:5], 0x8 ; C08E0508 s_load_dwordx4 s[32:35], s[4:5], 0xc ; C090050C s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 s_load_dwordx8 s[44:51], s[6:7], 0x18 ; C0D60718 v_mad_f32 v17, v1, v12, v17 ; D2820011 04461901 v_mad_f32 v8, -v8, v18, v17 ; D2820008 24462508 v_mul_f32_e32 v17, v16, v1 ; 10220310 v_mad_f32 v1, v1, v16, v17 ; D2820001 04462101 v_mad_f32 v7, -v7, v18, v1 ; D2820007 24062507 v_min_f32_e32 v1, 0x7f7fffff, v11 ; 1E0216FF 7F7FFFFF v_mul_f32_e32 v11, v1, v4 ; 10160901 v_mul_f32_e32 v5, v1, v5 ; 100A0B01 v_mul_f32_e32 v6, v1, v6 ; 100C0D01 v_mov_b32_e32 v1, s8 ; 7E020208 v_mov_b32_e32 v4, s9 ; 7E080209 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[28:31] ; F0800700 00E91102 image_sample v[20:22], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[44:51], s[32:35] ; F0800700 010B1402 v_mul_f32_e32 v2, v11, v10 ; 1004150B v_mad_f32 v2, v12, v5, v2 ; D2820002 040A0B0C v_mad_f32 v2, v16, v6, v2 ; D2820002 040A0D10 v_mul_f32_e32 v3, v11, v9 ; 1006130B v_mad_f32 v3, v8, v5, v3 ; D2820003 040E0B08 v_mad_f32 v3, v7, v6, v3 ; D2820003 040E0D07 v_sub_f32_e64 v5, 1.0, s22 ; D2080005 00002CF2 v_sub_f32_e64 v6, 1.0, s23 ; D2080006 00002EF2 v_sub_f32_e64 v7, 1.0, s24 ; D2080007 000030F2 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v5, v17, v5 ; 100A0B11 v_mul_f32_e32 v6, v18, v6 ; 100C0D12 v_mul_f32_e32 v7, v19, v7 ; 100E0F13 v_mul_f32_e32 v8, s19, v4 ; 10100813 v_mul_f32_e32 v9, s20, v4 ; 10120814 v_mul_f32_e32 v4, s21, v4 ; 10080815 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v8, v20, v8 ; 10101114 v_mul_f32_e32 v9, v21, v9 ; 10121315 v_mul_f32_e32 v4, v22, v4 ; 10080916 v_mad_f32 v5, v0, v5, s13 ; D2820005 00360B00 v_mad_f32 v6, v6, v0, s14 ; D2820006 003A0106 v_mad_f32 v0, v7, v0, s15 ; D2820000 003E0107 v_mad_f32 v7, v1, v8, s16 ; D2820007 00421101 v_mad_f32 v8, v9, v1, s17 ; D2820008 00460309 v_mad_f32 v1, v4, v1, s18 ; D2820001 004A0304 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_and_b32_e32 v4, 0x7fffffff, v3 ; 360806FF 7FFFFFFF v_log_f32_e32 v4, v4 ; 7E084F04 v_mov_b32_e32 v9, 0xb58637bd ; 7E1202FF B58637BD v_add_f32_e32 v10, v9, v2 ; 06140509 v_add_f32_e32 v3, v9, v3 ; 06060709 v_mul_legacy_f32_e32 v4, s12, v4 ; 0E08080C v_exp_f32_e32 v4, v4 ; 7E084B04 v_mov_b32_e32 v9, 0x41000000 ; 7E1202FF 41000000 v_add_f32_e32 v9, s12, v9 ; 0612120C v_mul_f32_e32 v4, v4, v9 ; 10081304 v_mul_f32_e32 v4, 0x3d22f983, v4 ; 100808FF 3D22F983 v_mul_f32_e32 v5, v5, v2 ; 100A0505 v_mul_f32_e32 v6, v6, v2 ; 100C0506 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v2, v5, 0, vcc ; D2000002 01A90105 v_cndmask_b32_e64 v5, v6, 0, vcc ; D2000005 01A90106 v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 v_mul_f32_e32 v6, v4, v7 ; 100C0F04 v_mul_f32_e32 v7, v4, v8 ; 100E1104 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v3, v6, 0, vcc ; D2000003 01A90106 v_cndmask_b32_e64 v4, v7, 0, vcc ; D2000004 01A90107 v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 v_add_f32_e32 v2, v2, v3 ; 06040702 v_add_f32_e32 v3, v5, v4 ; 06060905 v_add_f32_e32 v0, v0, v1 ; 06000300 v_mul_f32_e32 v1, v2, v15 ; 10021F02 v_mul_f32_e32 v2, v3, v14 ; 10041D03 v_mul_f32_e32 v0, v0, v13 ; 10001B00 v_mul_f32_e32 v1, s10, v1 ; 1002020A v_mul_f32_e32 v2, s11, v2 ; 1004040B v_mul_f32_e32 v0, s0, v0 ; 10000000 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 24 Code Size: 1384 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[4], PERSPECTIVE DCL IN[2], TEXCOORD[6], PERSPECTIVE DCL IN[3], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..12] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { -0.0000, 0.3000, 0.5900, 0.1100} IMM[2] FLT32 { 0.8000, 15.0000, 0.9151, 0.0000} IMM[3] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[2], IN[2] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[3].xxxx, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[2], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[1], IN[1] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[3].xxxx, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[1], TEMP[0].xxxx 8: TEX TEMP[3], IN[0], SAMP[1], 2D 9: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[0].x, TEMP[3], TEMP[3] 11: RSQ TEMP[0].x, TEMP[0].xxxx 12: MIN TEMP[0].x, IMM[3].xxxx, TEMP[0].xxxx 13: MUL TEMP[4].xyz, TEMP[3], TEMP[0].xxxx 14: DP3 TEMP[1].w, TEMP[4], TEMP[1] 15: MUL TEMP[5].xyz, TEMP[1].wwww, TEMP[4] 16: MAD TEMP[5].xyz, TEMP[5], IMM[0].xxxx, -TEMP[1] 17: UIF CONST[240].xxxx :0 18: MUL TEMP[6].xyz, CONST[7].xyww, IN[3].yyyy 19: MAD TEMP[6].xyz, CONST[6].xyww, IN[3].xxxx, TEMP[6] 20: MAD TEMP[6].xyz, CONST[8].xyww, IN[3].zzzz, TEMP[6] 21: MAD TEMP[6].xyz, CONST[9].xyww, IN[3].wwww, TEMP[6] 22: RCP TEMP[1].w, TEMP[6].zzzz 23: MUL TEMP[6].xy, TEMP[1].wwww, TEMP[6] 24: MAD TEMP[6].xy, TEMP[6], CONST[1], CONST[1].wzzw 25: TEX TEMP[6], TEMP[6], SAMP[0], 2D 26: UIF CONST[240].yyyy :0 27: DP3 TEMP[1].w, IN[3], IN[3] 28: RSQ TEMP[0], |TEMP[1].wwww| 29: MIN TEMP[1].w, IMM[3].xxxx, TEMP[0] 30: RCP TEMP[1].w, TEMP[1].wwww 31: ADD TEMP[1].w, -TEMP[1].wwww, CONST[0].xxxx 32: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[0].yyyy 33: MUL TEMP[1].w, TEMP[1].wwww, TEMP[1].wwww 34: ADD TEMP[2].w, TEMP[6].wwww, IMM[0].yyyy 35: MAD TEMP[7].xyz, TEMP[1].wwww, TEMP[2].wwww, IMM[0].zzzz 36: ELSE :38 37: MOV TEMP[7].xyz, IMM[0].zzzz 38: ENDIF 39: MUL TEMP[6].xyz, TEMP[6], TEMP[7] 40: ELSE :42 41: MOV TEMP[6].xyz, IMM[0].zzzz 42: ENDIF 43: MOV TEMP[7].z, IMM[0].zzzz 44: ADD TEMP[7].xyz, TEMP[7].zzzz, -CONST[10] 45: DP3 TEMP[1].x, TEMP[3], TEMP[1] 46: MAX TEMP[2].w, TEMP[1].xxxx, IMM[0].wwww 47: ADD TEMP[1].x, -TEMP[2].wwww, IMM[0].zzzz 48: ADD TEMP[1].y, |TEMP[1].xxxx|, IMM[1].xxxx 49: MUL TEMP[1].z, |TEMP[1].xxxx|, |TEMP[1].xxxx| 50: MUL TEMP[1].x, TEMP[1].zzzz, |TEMP[1].xxxx| 51: MUL TEMP[1].x, TEMP[1].xxxx, CONST[12].xxxx 52: TEX TEMP[8], IN[0], SAMP[2], 2D 53: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[8].zzzz 54: TEX TEMP[9], IN[0], SAMP[3], 2D 55: MUL TEMP[1].xzw, TEMP[1].xxxx, TEMP[9].xyyz 56: CMP TEMP[1].xyz, TEMP[1].yyyy, IMM[0].wwww, TEMP[1].xzww 57: TEX TEMP[10], IN[0], SAMP[4], 2D 58: LRP TEMP[11].xyz, CONST[12].yyyy, TEMP[10], TEMP[9] 59: DP3 TEMP[1].w, TEMP[11], IMM[1].yzww 60: LRP TEMP[10].xyz, CONST[12].zzzz, TEMP[1].wwww, TEMP[11] 61: MOV_SAT TEMP[2].w, TEMP[2].wwww 62: ADD TEMP[1].w, TEMP[2].wwww, IMM[1].xxxx 63: POW TEMP[3].w, |TEMP[2].wwww|, CONST[12].wwww 64: CMP TEMP[1].w, TEMP[1].wwww, IMM[0].wwww, TEMP[3].wwww 65: MUL TEMP[12].xyz, TEMP[1].wwww, CONST[11] 66: MUL TEMP[1].w, TEMP[8].yyyy, TEMP[1].wwww 67: MAD TEMP[8].xyw, TEMP[10].xyzz, TEMP[12].xyzz, -TEMP[11].xyzz 68: MAD TEMP[8].xyw, TEMP[1].wwww, TEMP[8], TEMP[11].xyzz 69: DP3 TEMP[1].w, TEMP[9], IMM[1].yzww 70: LRP TEMP[10].xyz, CONST[13].xxxx, TEMP[1].wwww, TEMP[9] 71: ADD TEMP[9].xyz, TEMP[10], TEMP[10] 72: MAX TEMP[10].xyz, |TEMP[9]|, -IMM[1].xxxx 73: DP3 TEMP[1].w, TEMP[5], TEMP[3] 74: MAX TEMP[2].w, TEMP[1].wwww, IMM[0].wwww 75: MIN TEMP[1].w, TEMP[2].wwww, IMM[2].xxxx 76: MUL TEMP[1].w, TEMP[1].wwww, CONST[13].yyyy 77: MUL TEMP[1].w, TEMP[1].wwww, TEMP[8].zzzz 78: MAD TEMP[3].xyz, TEMP[10], TEMP[10], -TEMP[8].xyww 79: MAD TEMP[3].xyz, TEMP[1].wwww, TEMP[3], TEMP[8].xyww 80: ADD TEMP[1].xyz, TEMP[1], TEMP[3] 81: DP3 TEMP[1].w, TEMP[1], IMM[1].yzww 82: LRP TEMP[3].xyz, CONST[13].zzzz, TEMP[1].wwww, TEMP[1] 83: MUL TEMP[1].xyz, TEMP[3], CONST[13].wwww 84: MUL TEMP[1].xyz, TEMP[7], TEMP[1] 85: MAD TEMP[1].xyz, TEMP[1], CONST[4].wwww, CONST[4] 86: TEX TEMP[3], IN[0], SAMP[5], 2D 87: MUL TEMP[3].xyz, TEMP[3], CONST[14].xxxx 88: MUL TEMP[3].xyz, TEMP[3], CONST[13].wwww 89: MAD TEMP[3].xyz, TEMP[3], CONST[5].wwww, CONST[5] 90: DP3_SAT TEMP[1].w, TEMP[4], TEMP[2] 91: ADD TEMP[2].w, TEMP[1].wwww, IMM[1].xxxx 92: CMP TEMP[1].w, TEMP[2].wwww, IMM[0].wwww, TEMP[1].wwww 93: DP3_SAT TEMP[2].x, TEMP[5], TEMP[2] 94: ADD TEMP[2].y, TEMP[2].xxxx, IMM[1].xxxx 95: POW TEMP[3].w, |TEMP[2].xxxx|, IMM[2].yyyy 96: MUL TEMP[2].x, TEMP[3].wwww, IMM[2].zzzz 97: MUL TEMP[2].xzw, TEMP[3].xyyz, TEMP[2].xxxx 98: CMP TEMP[2].xyz, TEMP[2].yyyy, IMM[0].wwww, TEMP[2].xzww 99: MAD TEMP[1].xyz, TEMP[1], TEMP[1].wwww, TEMP[2] 100: MUL TEMP[1].xyz, TEMP[6], TEMP[1] 101: MUL OUT[0].xyz, TEMP[1], CONST[15] 102: MOV OUT[0].w, IMM[0].wwww 103: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 220) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %54 = bitcast <8 x i32> addrspace(2)* %53 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %57 = bitcast <4 x i32> addrspace(2)* %56 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %60 = bitcast <8 x i32> addrspace(2)* %59 to <32 x i8> addrspace(2)* %61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %63 = bitcast <4 x i32> addrspace(2)* %62 to <16 x i8> addrspace(2)* %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %72 = bitcast <8 x i32> addrspace(2)* %71 to <32 x i8> addrspace(2)* %73 = load <32 x i8>, <32 x i8> addrspace(2)* %72, align 32, !tbaa !0 %74 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %75 = bitcast <4 x i32> addrspace(2)* %74 to <16 x i8> addrspace(2)* %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 %77 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %78 = bitcast <8 x i32> addrspace(2)* %77 to <32 x i8> addrspace(2)* %79 = load <32 x i8>, <32 x i8> addrspace(2)* %78, align 32, !tbaa !0 %80 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %81 = bitcast <4 x i32> addrspace(2)* %80 to <16 x i8> addrspace(2)* %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %91 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %95 = fmul float %88, %88 %96 = fmul float %89, %89 %97 = fadd float %96, %95 %98 = fmul float %90, %90 %99 = fadd float %97, %98 %100 = call float @llvm.AMDGPU.rsq.clamped.f32(float %99) %101 = call float @llvm.minnum.f32(float %100, float 0x47EFFFFFE0000000) %102 = fmul float %88, %101 %103 = fmul float %89, %101 %104 = fmul float %90, %101 %105 = fmul float %85, %85 %106 = fmul float %86, %86 %107 = fadd float %106, %105 %108 = fmul float %87, %87 %109 = fadd float %107, %108 %110 = call float @llvm.AMDGPU.rsq.clamped.f32(float %109) %111 = call float @llvm.minnum.f32(float %110, float 0x47EFFFFFE0000000) %112 = fmul float %85, %111 %113 = fmul float %86, %111 %114 = fmul float %87, %111 %115 = bitcast float %83 to i32 %116 = bitcast float %84 to i32 %117 = insertelement <2 x i32> undef, i32 %115, i32 0 %118 = insertelement <2 x i32> %117, i32 %116, i32 1 %119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %118, <32 x i8> %55, <16 x i8> %58, i32 2) %120 = extractelement <4 x float> %119, i32 0 %121 = extractelement <4 x float> %119, i32 1 %122 = extractelement <4 x float> %119, i32 2 %123 = fmul float %120, 2.000000e+00 %124 = fadd float %123, -1.000000e+00 %125 = fmul float %121, 2.000000e+00 %126 = fadd float %125, -1.000000e+00 %127 = fmul float %122, 2.000000e+00 %128 = fadd float %127, -1.000000e+00 %129 = fmul float %124, %124 %130 = fmul float %126, %126 %131 = fadd float %130, %129 %132 = fmul float %128, %128 %133 = fadd float %131, %132 %134 = call float @llvm.AMDGPU.rsq.clamped.f32(float %133) %135 = call float @llvm.minnum.f32(float %134, float 0x47EFFFFFE0000000) %136 = fmul float %124, %135 %137 = fmul float %126, %135 %138 = fmul float %128, %135 %139 = fmul float %136, %102 %140 = fmul float %137, %103 %141 = fadd float %140, %139 %142 = fmul float %138, %104 %143 = fadd float %141, %142 %144 = fmul float %143, %136 %145 = fmul float %143, %137 %146 = fmul float %143, %138 %147 = fmul float %144, 2.000000e+00 %148 = fsub float %147, %102 %149 = fmul float %145, 2.000000e+00 %150 = fsub float %149, %103 %151 = fmul float %146, 2.000000e+00 %152 = fsub float %151, %104 %153 = bitcast float %52 to i32 %154 = icmp eq i32 %153, 0 br i1 %154, label %ENDIF, label %IF IF: ; preds = %main_body %155 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %156 = load <16 x i8>, <16 x i8> addrspace(2)* %155, align 16, !tbaa !0 %157 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %158 = load <32 x i8>, <32 x i8> addrspace(2)* %157, align 32, !tbaa !0 %159 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3844) %160 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %161 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %162 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %163 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %164 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %165 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %166 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %167 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %168 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %169 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %170 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %171 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %172 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %173 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %174 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %175 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %176 = fmul float %168, %92 %177 = fmul float %167, %92 %178 = fmul float %166, %92 %179 = fmul float %171, %91 %180 = fadd float %179, %176 %181 = fmul float %170, %91 %182 = fadd float %181, %177 %183 = fmul float %169, %91 %184 = fadd float %183, %178 %185 = fmul float %165, %93 %186 = fadd float %185, %180 %187 = fmul float %164, %93 %188 = fadd float %187, %182 %189 = fmul float %163, %93 %190 = fadd float %189, %184 %191 = fmul float %162, %94 %192 = fadd float %191, %186 %193 = fmul float %161, %94 %194 = fadd float %193, %188 %195 = fmul float %160, %94 %196 = fadd float %195, %190 %197 = fdiv float 1.000000e+00, %196 %198 = fmul float %197, %192 %199 = fmul float %197, %194 %200 = fmul float %198, %175 %201 = fadd float %200, %172 %202 = fmul float %199, %174 %203 = fadd float %202, %173 %204 = bitcast float %201 to i32 %205 = bitcast float %203 to i32 %206 = insertelement <2 x i32> undef, i32 %204, i32 0 %207 = insertelement <2 x i32> %206, i32 %205, i32 1 %208 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %207, <32 x i8> %158, <16 x i8> %156, i32 2) %209 = extractelement <4 x float> %208, i32 0 %210 = extractelement <4 x float> %208, i32 1 %211 = extractelement <4 x float> %208, i32 2 %212 = bitcast float %159 to i32 %213 = icmp eq i32 %212, 0 br i1 %213, label %ENDIF52, label %IF53 ENDIF: ; preds = %main_body, %ENDIF52 %temp24.0 = phi float [ %436, %ENDIF52 ], [ 1.000000e+00, %main_body ] %temp25.0 = phi float [ %437, %ENDIF52 ], [ 1.000000e+00, %main_body ] %temp26.0 = phi float [ %438, %ENDIF52 ], [ 1.000000e+00, %main_body ] %214 = fsub float 1.000000e+00, %34 %215 = fsub float 1.000000e+00, %35 %216 = fsub float 1.000000e+00, %36 %217 = fmul float %124, %102 %218 = fmul float %126, %103 %219 = fadd float %218, %217 %220 = fmul float %128, %104 %221 = fadd float %219, %220 %222 = call float @llvm.maxnum.f32(float %221, float 0.000000e+00) %223 = fsub float 1.000000e+00, %222 %224 = call float @fabs(float %223) %225 = fadd float %224, 0xBEB0C6F7A0000000 %226 = call float @fabs(float %223) %227 = call float @fabs(float %223) %228 = fmul float %226, %227 %229 = call float @fabs(float %223) %230 = fmul float %228, %229 %231 = fmul float %230, %40 %232 = bitcast float %83 to i32 %233 = bitcast float %84 to i32 %234 = insertelement <2 x i32> undef, i32 %232, i32 0 %235 = insertelement <2 x i32> %234, i32 %233, i32 1 %236 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %235, <32 x i8> %61, <16 x i8> %64, i32 2) %237 = extractelement <4 x float> %236, i32 1 %238 = extractelement <4 x float> %236, i32 2 %239 = fmul float %231, %238 %240 = bitcast float %83 to i32 %241 = bitcast float %84 to i32 %242 = insertelement <2 x i32> undef, i32 %240, i32 0 %243 = insertelement <2 x i32> %242, i32 %241, i32 1 %244 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %243, <32 x i8> %67, <16 x i8> %70, i32 2) %245 = extractelement <4 x float> %244, i32 0 %246 = extractelement <4 x float> %244, i32 1 %247 = extractelement <4 x float> %244, i32 2 %248 = fmul float %239, %245 %249 = fmul float %239, %246 %250 = fmul float %239, %247 %251 = call float @llvm.AMDGPU.cndlt(float %225, float 0.000000e+00, float %248) %252 = call float @llvm.AMDGPU.cndlt(float %225, float 0.000000e+00, float %249) %253 = call float @llvm.AMDGPU.cndlt(float %225, float 0.000000e+00, float %250) %254 = bitcast float %83 to i32 %255 = bitcast float %84 to i32 %256 = insertelement <2 x i32> undef, i32 %254, i32 0 %257 = insertelement <2 x i32> %256, i32 %255, i32 1 %258 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %257, <32 x i8> %73, <16 x i8> %76, i32 2) %259 = extractelement <4 x float> %258, i32 0 %260 = extractelement <4 x float> %258, i32 1 %261 = extractelement <4 x float> %258, i32 2 %262 = call float @llvm.AMDGPU.lrp(float %41, float %259, float %245) %263 = call float @llvm.AMDGPU.lrp(float %41, float %260, float %246) %264 = call float @llvm.AMDGPU.lrp(float %41, float %261, float %247) %265 = fmul float %262, 0x3FD3333340000000 %266 = fmul float %263, 0x3FE2E147A0000000 %267 = fadd float %266, %265 %268 = fmul float %264, 0x3FBC28F5C0000000 %269 = fadd float %267, %268 %270 = call float @llvm.AMDGPU.lrp(float %42, float %269, float %262) %271 = call float @llvm.AMDGPU.lrp(float %42, float %269, float %263) %272 = call float @llvm.AMDGPU.lrp(float %42, float %269, float %264) %273 = call float @llvm.AMDIL.clamp.(float %222, float 0.000000e+00, float 1.000000e+00) %274 = fadd float %273, 0xBEB0C6F7A0000000 %275 = call float @fabs(float %273) %276 = call float @llvm.pow.f32(float %275, float %43) %277 = call float @llvm.AMDGPU.cndlt(float %274, float 0.000000e+00, float %276) %278 = fmul float %277, %37 %279 = fmul float %277, %38 %280 = fmul float %277, %39 %281 = fmul float %237, %277 %282 = fmul float %270, %278 %283 = fsub float %282, %262 %284 = fmul float %271, %279 %285 = fsub float %284, %263 %286 = fmul float %272, %280 %287 = fsub float %286, %264 %288 = fmul float %281, %283 %289 = fadd float %288, %262 %290 = fmul float %281, %285 %291 = fadd float %290, %263 %292 = fmul float %281, %287 %293 = fadd float %292, %264 %294 = fmul float %245, 0x3FD3333340000000 %295 = fmul float %246, 0x3FE2E147A0000000 %296 = fadd float %295, %294 %297 = fmul float %247, 0x3FBC28F5C0000000 %298 = fadd float %296, %297 %299 = call float @llvm.AMDGPU.lrp(float %44, float %298, float %245) %300 = call float @llvm.AMDGPU.lrp(float %44, float %298, float %246) %301 = call float @llvm.AMDGPU.lrp(float %44, float %298, float %247) %302 = fadd float %299, %299 %303 = fadd float %300, %300 %304 = fadd float %301, %301 %305 = call float @fabs(float %302) %306 = call float @llvm.maxnum.f32(float %305, float 0x3EB0C6F7A0000000) %307 = call float @fabs(float %303) %308 = call float @llvm.maxnum.f32(float %307, float 0x3EB0C6F7A0000000) %309 = call float @fabs(float %304) %310 = call float @llvm.maxnum.f32(float %309, float 0x3EB0C6F7A0000000) %311 = fmul float %148, %124 %312 = fmul float %150, %126 %313 = fadd float %312, %311 %314 = fmul float %152, %128 %315 = fadd float %313, %314 %316 = call float @llvm.maxnum.f32(float %315, float 0.000000e+00) %317 = call float @llvm.minnum.f32(float %316, float 0x3FE99999A0000000) %318 = fmul float %317, %45 %319 = fmul float %318, %238 %320 = fmul float %306, %306 %321 = fsub float %320, %289 %322 = fmul float %308, %308 %323 = fsub float %322, %291 %324 = fmul float %310, %310 %325 = fsub float %324, %293 %326 = fmul float %319, %321 %327 = fadd float %326, %289 %328 = fmul float %319, %323 %329 = fadd float %328, %291 %330 = fmul float %319, %325 %331 = fadd float %330, %293 %332 = fadd float %251, %327 %333 = fadd float %252, %329 %334 = fadd float %253, %331 %335 = fmul float %332, 0x3FD3333340000000 %336 = fmul float %333, 0x3FE2E147A0000000 %337 = fadd float %336, %335 %338 = fmul float %334, 0x3FBC28F5C0000000 %339 = fadd float %337, %338 %340 = call float @llvm.AMDGPU.lrp(float %46, float %339, float %332) %341 = call float @llvm.AMDGPU.lrp(float %46, float %339, float %333) %342 = call float @llvm.AMDGPU.lrp(float %46, float %339, float %334) %343 = fmul float %340, %47 %344 = fmul float %341, %47 %345 = fmul float %342, %47 %346 = fmul float %214, %343 %347 = fmul float %215, %344 %348 = fmul float %216, %345 %349 = fmul float %346, %29 %350 = fadd float %349, %26 %351 = fmul float %347, %29 %352 = fadd float %351, %27 %353 = fmul float %348, %29 %354 = fadd float %353, %28 %355 = bitcast float %83 to i32 %356 = bitcast float %84 to i32 %357 = insertelement <2 x i32> undef, i32 %355, i32 0 %358 = insertelement <2 x i32> %357, i32 %356, i32 1 %359 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %358, <32 x i8> %79, <16 x i8> %82, i32 2) %360 = extractelement <4 x float> %359, i32 0 %361 = extractelement <4 x float> %359, i32 1 %362 = extractelement <4 x float> %359, i32 2 %363 = fmul float %360, %48 %364 = fmul float %361, %48 %365 = fmul float %362, %48 %366 = fmul float %363, %47 %367 = fmul float %364, %47 %368 = fmul float %365, %47 %369 = fmul float %366, %33 %370 = fadd float %369, %30 %371 = fmul float %367, %33 %372 = fadd float %371, %31 %373 = fmul float %368, %33 %374 = fadd float %373, %32 %375 = fmul float %136, %112 %376 = fmul float %137, %113 %377 = fadd float %376, %375 %378 = fmul float %138, %114 %379 = fadd float %377, %378 %380 = call float @llvm.AMDIL.clamp.(float %379, float 0.000000e+00, float 1.000000e+00) %381 = fadd float %380, 0xBEB0C6F7A0000000 %382 = call float @llvm.AMDGPU.cndlt(float %381, float 0.000000e+00, float %380) %383 = fmul float %148, %112 %384 = fmul float %150, %113 %385 = fadd float %384, %383 %386 = fmul float %152, %114 %387 = fadd float %385, %386 %388 = call float @llvm.AMDIL.clamp.(float %387, float 0.000000e+00, float 1.000000e+00) %389 = fadd float %388, 0xBEB0C6F7A0000000 %390 = call float @fabs(float %388) %391 = call float @llvm.pow.f32(float %390, float 1.500000e+01) %392 = fmul float %391, 0x3FED48D5A0000000 %393 = fmul float %370, %392 %394 = fmul float %372, %392 %395 = fmul float %374, %392 %396 = call float @llvm.AMDGPU.cndlt(float %389, float 0.000000e+00, float %393) %397 = call float @llvm.AMDGPU.cndlt(float %389, float 0.000000e+00, float %394) %398 = call float @llvm.AMDGPU.cndlt(float %389, float 0.000000e+00, float %395) %399 = fmul float %350, %382 %400 = fadd float %399, %396 %401 = fmul float %352, %382 %402 = fadd float %401, %397 %403 = fmul float %354, %382 %404 = fadd float %403, %398 %405 = fmul float %temp24.0, %400 %406 = fmul float %temp25.0, %402 %407 = fmul float %temp26.0, %404 %408 = fmul float %405, %49 %409 = fmul float %406, %50 %410 = fmul float %407, %51 %411 = call i32 @llvm.SI.packf16(float %408, float %409) %412 = bitcast i32 %411 to float %413 = call i32 @llvm.SI.packf16(float %410, float 0.000000e+00) %414 = bitcast i32 %413 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %412, float %414, float %412, float %414) ret void IF53: ; preds = %IF %415 = extractelement <4 x float> %208, i32 3 %416 = fmul float %91, %91 %417 = fmul float %92, %92 %418 = fadd float %417, %416 %419 = fmul float %93, %93 %420 = fadd float %418, %419 %421 = call float @fabs(float %420) %422 = call float @llvm.AMDGPU.rsq.clamped.f32(float %421) %423 = call float @llvm.minnum.f32(float %422, float 0x47EFFFFFE0000000) %424 = fdiv float 1.000000e+00, %423 %425 = fsub float %24, %424 %426 = fmul float %425, %25 %427 = call float @llvm.AMDIL.clamp.(float %426, float 0.000000e+00, float 1.000000e+00) %428 = fmul float %427, %427 %429 = fadd float %415, -1.000000e+00 %430 = fmul float %428, %429 %431 = fadd float %430, 1.000000e+00 %432 = fmul float %428, %429 %433 = fadd float %432, 1.000000e+00 %434 = fmul float %428, %429 %435 = fadd float %434, 1.000000e+00 br label %ENDIF52 ENDIF52: ; preds = %IF, %IF53 %temp28.0 = phi float [ %431, %IF53 ], [ 1.000000e+00, %IF ] %temp29.0 = phi float [ %433, %IF53 ], [ 1.000000e+00, %IF ] %temp30.0 = phi float [ %435, %IF53 ], [ 1.000000e+00, %IF ] %436 = fmul float %209, %temp28.0 %437 = fmul float %210, %temp29.0 %438 = fmul float %211, %temp30.0 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_movk_i32 s0, 0xf00 ; B0000F00 s_load_dwordx4 s[16:19], s[2:3], 0x0 ; C0880300 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 v_mov_b32_e32 v4, 1.0 ; 7E0802F2 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v7, v0, 0, 1, [m0] ; C81C0400 v_interp_p2_f32 v7, [v7], v1, 0, 1, [m0] ; C81D0401 v_interp_p1_f32 v8, v0, 1, 1, [m0] ; C8200500 v_interp_p2_f32 v8, [v8], v1, 1, 1, [m0] ; C8210501 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s2, s[16:19], s0 ; C2011000 v_interp_p1_f32 v9, v0, 2, 1, [m0] ; C8240600 v_interp_p2_f32 v9, [v9], v1, 2, 1, [m0] ; C8250601 v_interp_p1_f32 v12, v0, 0, 2, [m0] ; C8300800 v_interp_p2_f32 v12, [v12], v1, 0, 2, [m0] ; C8310801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00 v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[8:11] ; F0800700 00450F02 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v13, 2.0, v15, -1.0 ; D282000D 03CE1EF4 v_mad_f32 v14, 2.0, v16, -1.0 ; D282000E 03CE20F4 v_mad_f32 v15, 2.0, v17, -1.0 ; D282000F 03CE22F4 v_mul_f32_e32 v5, v12, v12 ; 100A190C v_mad_f32 v5, v11, v11, v5 ; D2820005 0416170B v_mad_f32 v5, v10, v10, v5 ; D2820005 0416150A v_rsq_clamp_f32_e32 v16, v5 ; 7E205905 v_mul_f32_e32 v5, v7, v7 ; 100A0F07 v_mad_f32 v5, v8, v8, v5 ; D2820005 04161108 s_buffer_load_dword s0, s[16:19], 0x13 ; C2001113 s_buffer_load_dword s1, s[16:19], 0x17 ; C2009117 v_mad_f32 v5, v9, v9, v5 ; D2820005 04161309 v_rsq_clamp_f32_e32 v17, v5 ; 7E225905 v_mul_f32_e32 v5, v13, v13 ; 100A1B0D v_mad_f32 v5, v14, v14, v5 ; D2820005 04161D0E v_mad_f32 v5, v15, v15, v5 ; D2820005 04161F0F v_rsq_clamp_f32_e32 v18, v5 ; 7E245905 v_cmp_ne_i32_e64 s[2:3], 0, s2 ; D10A0002 00000480 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 1.0 ; 7E0C02F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[2:3], s[2:3] ; BE822402 s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E s_cbranch_execz BB0_4 ; BF880000 v_interp_p1_f32 v4, v0, 0, 3, [m0] ; C8100C00 v_interp_p2_f32 v4, [v4], v1, 0, 3, [m0] ; C8110C01 v_interp_p1_f32 v5, v0, 1, 3, [m0] ; C8140D00 v_interp_p2_f32 v5, [v5], v1, 1, 3, [m0] ; C8150D01 v_interp_p1_f32 v6, v0, 2, 3, [m0] ; C8180E00 v_interp_p2_f32 v6, [v6], v1, 2, 3, [m0] ; C8190E01 s_buffer_load_dword s8, s[16:19], 0x19 ; C2041119 s_buffer_load_dword s9, s[16:19], 0x1b ; C204911B s_buffer_load_dword s10, s[16:19], 0x1c ; C205111C s_buffer_load_dword s11, s[16:19], 0x1d ; C205911D s_buffer_load_dword s12, s[16:19], 0x4 ; C2061104 s_buffer_load_dword s13, s[16:19], 0x5 ; C2069105 s_buffer_load_dword s14, s[16:19], 0x6 ; C2071106 s_buffer_load_dword s15, s[16:19], 0x7 ; C2079107 s_buffer_load_dword s20, s[16:19], 0x18 ; C20A1118 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 s_buffer_load_dword s21, s[16:19], 0x27 ; C20A9127 s_buffer_load_dword s22, s[16:19], 0x1f ; C20B111F s_buffer_load_dword s23, s[16:19], 0x20 ; C20B9120 s_buffer_load_dword s24, s[16:19], 0x21 ; C20C1121 s_buffer_load_dword s25, s[16:19], 0x23 ; C20C9123 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s10, v5 ; 10020A0A v_mul_f32_e32 v19, s11, v5 ; 10260A0B v_mad_f32 v1, s20, v4, v1 ; D2820001 04060814 v_mad_f32 v19, s8, v4, v19 ; D2820013 044E0808 s_buffer_load_dword s8, s[16:19], 0x24 ; C2041124 s_buffer_load_dword s10, s[16:19], 0x25 ; C2051125 v_mul_f32_e32 v20, s22, v5 ; 10280A16 v_mad_f32 v20, s9, v4, v20 ; D2820014 04520809 v_mad_f32 v1, s23, v6, v1 ; D2820001 04060C17 v_mad_f32 v20, s25, v6, v20 ; D2820014 04520C19 v_mad_f32 v20, s21, v0, v20 ; D2820014 04520015 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_movk_i32 s9, 0xf04 ; B0090F04 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_buffer_load_dword s9, s[16:19], s9 ; C2049009 v_rcp_f32_e32 v20, v20 ; 7E285514 v_mad_f32 v19, s24, v6, v19 ; D2820013 044E0C18 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s8, v0, v1 ; D2820001 04060008 v_mad_f32 v0, s10, v0, v19 ; D2820000 044E000A v_mul_f32_e32 v1, v1, v20 ; 10022901 v_mul_f32_e32 v0, v0, v20 ; 10002900 v_mov_b32_e32 v19, s15 ; 7E26020F v_mad_f32 v19, s12, v1, v19 ; D2820013 044E020C v_mov_b32_e32 v1, s14 ; 7E02020E v_mad_f32 v20, s13, v0, v1 ; D2820014 0406000D image_sample v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[28:35], s[20:23] ; F0800F00 00A71313 v_cmp_ne_i32_e64 s[8:9], 0, s9 ; D10A0008 00001280 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[8:9], s[8:9] ; BE882408 s_xor_b64 s[8:9], exec, s[8:9] ; 8988087E s_cbranch_execz BB0_5 ; BF880000 s_buffer_load_dword s10, s[16:19], 0x0 ; C2051100 s_buffer_load_dword s11, s[16:19], 0x1 ; C2059101 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v4, v4 ; 10000904 v_mad_f32 v0, v5, v5, v0 ; D2820000 04020B05 v_mad_f32 v0, v6, v6, v0 ; D2820000 04020D06 v_rsq_clamp_f32_e64 v0, |v0| ; D3580100 00000100 v_add_f32_e32 v1, -1.0, v22 ; 06022CF3 v_min_f32_e32 v0, 0x7f7fffff, v0 ; 1E0000FF 7F7FFFFF v_rcp_f32_e32 v0, v0 ; 7E005500 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v0, s10, v0 ; 0800000A v_mul_f32_e32 v0, s11, v0 ; 1000000B v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mad_f32 v0, v0, v1, 1.0 ; D2820000 03CA0300 s_or_b64 exec, exec, s[8:9] ; 88FE087E v_mul_f32_e32 v6, v0, v19 ; 100C2700 v_mul_f32_e32 v5, v0, v20 ; 100A2900 v_mul_f32_e32 v4, v0, v21 ; 10082B00 s_or_b64 exec, exec, s[2:3] ; 88FE027E v_mov_b32_e32 v1, s0 ; 7E020200 v_mov_b32_e32 v0, s1 ; 7E000201 v_min_f32_e32 v23, 0x7f7fffff, v16 ; 1E2E20FF 7F7FFFFF v_min_f32_e32 v16, 0x7f7fffff, v18 ; 1E2024FF 7F7FFFFF s_buffer_load_dword s8, s[16:19], 0x10 ; C2041110 s_buffer_load_dword s9, s[16:19], 0x11 ; C2049111 v_mul_f32_e32 v20, v23, v12 ; 10281917 s_buffer_load_dword s10, s[16:19], 0x12 ; C2051112 s_buffer_load_dword s1, s[16:19], 0x14 ; C2009114 s_buffer_load_dword s11, s[16:19], 0x15 ; C2059115 s_buffer_load_dword s12, s[16:19], 0x16 ; C2061116 s_buffer_load_dword s15, s[16:19], 0x28 ; C2079128 s_buffer_load_dword s14, s[16:19], 0x29 ; C2071129 s_buffer_load_dword s13, s[16:19], 0x2a ; C206912A s_buffer_load_dword s23, s[16:19], 0x2c ; C20B912C s_buffer_load_dword s24, s[16:19], 0x2d ; C20C112D s_buffer_load_dword s25, s[16:19], 0x2e ; C20C912E s_buffer_load_dword s30, s[16:19], 0x30 ; C20F1130 s_buffer_load_dword s29, s[16:19], 0x31 ; C20E9131 s_buffer_load_dword s28, s[16:19], 0x32 ; C20E1132 s_buffer_load_dword s27, s[16:19], 0x33 ; C20D9133 s_buffer_load_dword s26, s[16:19], 0x34 ; C20D1134 s_buffer_load_dword s31, s[16:19], 0x35 ; C20F9135 s_buffer_load_dword s21, s[16:19], 0x36 ; C20A9136 s_buffer_load_dword s20, s[16:19], 0x37 ; C20A1137 s_buffer_load_dword s22, s[16:19], 0x38 ; C20B1138 s_buffer_load_dword s0, s[16:19], 0x3c ; C200113C s_buffer_load_dword s2, s[16:19], 0x3d ; C201113D s_buffer_load_dword s3, s[16:19], 0x3e ; C201913E v_mul_f32_e32 v18, v16, v13 ; 10241B10 v_mul_f32_e32 v19, v16, v14 ; 10261D10 v_mul_f32_e32 v16, v16, v15 ; 10201F10 v_mul_f32_e32 v21, v23, v11 ; 102A1717 v_mul_f32_e32 v22, v20, v18 ; 102C2514 v_mad_f32 v24, v19, v21, v22 ; D2820018 045A2B13 v_mul_f32_e32 v22, v23, v10 ; 102C1517 v_mad_f32 v24, v16, v22, v24 ; D2820018 04622D10 v_mul_f32_e32 v25, v18, v24 ; 10323112 v_mad_f32 v25, v24, v18, v25 ; D2820019 04662518 v_mad_f32 v12, -v12, v23, v25 ; D282000C 24662F0C v_mul_f32_e32 v25, v19, v24 ; 10323113 s_load_dwordx4 s[40:43], s[4:5], 0x8 ; C0940508 s_load_dwordx4 s[44:47], s[4:5], 0xc ; C096050C s_load_dwordx4 s[48:51], s[4:5], 0x10 ; C0980510 s_load_dwordx4 s[16:19], s[4:5], 0x14 ; C0880514 s_load_dwordx8 s[52:59], s[6:7], 0x10 ; C0DA0710 s_load_dwordx8 s[60:67], s[6:7], 0x18 ; C0DE0718 s_load_dwordx8 s[68:75], s[6:7], 0x20 ; C0E20720 s_load_dwordx8 s[32:39], s[6:7], 0x28 ; C0D00728 v_mad_f32 v25, v24, v19, v25 ; D2820019 04662718 v_mad_f32 v11, -v11, v23, v25 ; D282000B 24662F0B v_mul_f32_e32 v25, v16, v24 ; 10323110 v_mad_f32 v24, v24, v16, v25 ; D2820018 04662118 v_mad_f32 v10, -v10, v23, v24 ; D282000A 24622F0A v_min_f32_e32 v23, 0x7f7fffff, v17 ; 1E2E22FF 7F7FFFFF v_mul_f32_e32 v17, v23, v7 ; 10220F17 v_mul_f32_e32 v8, v23, v8 ; 10101117 v_mul_f32_e32 v7, v23, v9 ; 100E1317 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[23:24], 6, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[52:59], s[40:43] ; F0800600 014D1702 image_sample v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[60:67], s[44:47] ; F0800700 016F1902 image_sample v[28:30], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[68:75], s[48:51] ; F0800700 01911C02 v_mul_f32_e32 v9, v20, v13 ; 10121B14 v_mad_f32 v9, v14, v21, v9 ; D2820009 04262B0E v_mad_f32 v9, v15, v22, v9 ; D2820009 04262D0F v_max_f32_e32 v9, 0, v9 ; 20121280 v_mul_f32_e32 v13, v13, v12 ; 101A190D v_mad_f32 v13, v11, v14, v13 ; D282000D 04361D0B v_mad_f32 v13, v10, v15, v13 ; D282000D 04361F0A v_max_f32_e32 v13, 0, v13 ; 201A1A80 v_min_f32_e32 v13, 0x3f4ccccd, v13 ; 1E1A1AFF 3F4CCCCD v_mul_f32_e32 v13, s31, v13 ; 101A1A1F s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v13, v24, v13 ; 101A1B18 v_mov_b32_e32 v14, 0x3f170a3d ; 7E1C02FF 3F170A3D s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v15, 0x3e99999a, v25 ; 101E32FF 3E99999A v_mad_f32 v15, v26, v14, v15 ; D282000F 043E1D1A v_mov_b32_e32 v20, 0x3de147ae ; 7E2802FF 3DE147AE v_mad_f32 v15, v27, v20, v15 ; D282000F 043E291B v_sub_f32_e64 v21, 1.0, s26 ; D2080015 000034F2 v_mul_f32_e32 v22, v25, v21 ; 102C2B19 v_mad_f32 v22, s26, v15, v22 ; D2820016 045A1E1A v_mul_f32_e32 v31, v26, v21 ; 103E2B1A v_mad_f32 v31, s26, v15, v31 ; D282001F 047E1E1A v_mul_f32_e32 v32, v27, v21 ; 10402B1B v_mad_f32 v32, s26, v15, v32 ; D2820020 04821E1A v_mad_f32 v22, v21, v25, v22 ; D2820016 045A3315 v_mad_f32 v31, v21, v26, v31 ; D282001F 047E3515 v_mad_f32 v21, v21, v27, v32 ; D2820015 04823715 v_sub_f32_e32 v32, 1.0, v9 ; 084012F2 v_mul_f32_e64 v33, |v32|, |v32| ; D2100321 00024120 v_mul_f32_e64 v33, v33, |v32| ; D2100221 00024121 v_mul_f32_e32 v33, s30, v33 ; 1042421E v_mul_f32_e32 v24, v24, v33 ; 10304318 v_mul_f32_e32 v33, v25, v24 ; 10423119 v_mul_f32_e32 v34, v26, v24 ; 1044311A v_mul_f32_e32 v24, v27, v24 ; 1030311B v_sub_f32_e64 v35, 1.0, s29 ; D2080023 00003AF2 v_mul_f32_e32 v25, v25, v35 ; 10324719 v_mul_f32_e32 v26, v26, v35 ; 1034471A v_mul_f32_e32 v27, v27, v35 ; 1036471B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v25, s29, v28, v25 ; D2820019 0466381D v_mad_f32 v26, s29, v29, v26 ; D282001A 046A3A1D v_mad_f32 v27, s29, v30, v27 ; D282001B 046E3C1D v_mul_f32_e32 v28, 0x3e99999a, v25 ; 103832FF 3E99999A v_mad_f32 v28, v26, v14, v28 ; D282001C 04721D1A v_mad_f32 v28, v27, v20, v28 ; D282001C 0472291B v_sub_f32_e64 v29, 1.0, s28 ; D208001D 000038F2 v_mul_f32_e32 v30, v25, v29 ; 103C3B19 v_mad_f32 v30, s28, v28, v30 ; D282001E 047A381C v_mul_f32_e32 v35, v26, v29 ; 10463B1A v_mad_f32 v35, s28, v28, v35 ; D2820023 048E381C v_mul_f32_e32 v29, v27, v29 ; 103A3B1B v_mad_f32 v28, s28, v28, v29 ; D282001C 0476381C v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_mov_b32_e32 v29, 0x7fffffff ; 7E3A02FF 7FFFFFFF v_and_b32_e32 v36, v9, v29 ; 36483B09 v_log_f32_e32 v36, v36 ; 7E484F24 v_mov_b32_e32 v37, 0xb58637bd ; 7E4A02FF B58637BD v_add_f32_e32 v9, v37, v9 ; 06121325 v_cmp_gt_f32_e32 vcc, 0, v9 ; 7C081280 v_mul_legacy_f32_e32 v9, s27, v36 ; 0E12481B v_exp_f32_e32 v9, v9 ; 7E124B09 v_cndmask_b32_e64 v9, v9, 0, vcc ; D2000009 01A90109 v_mul_f32_e32 v23, v9, v23 ; 102E2F09 v_mul_f32_e32 v36, s23, v9 ; 10481217 v_mad_f32 v30, v30, v36, -v25 ; D282001E 8466491E v_mul_f32_e32 v36, s24, v9 ; 10481218 v_mad_f32 v35, v35, v36, -v26 ; D2820023 846A4923 v_mul_f32_e32 v9, s25, v9 ; 10121219 v_mad_f32 v9, v28, v9, -v27 ; D2820009 846E131C v_mad_f32 v25, v23, v30, v25 ; D2820019 04663D17 v_mad_f32 v26, v23, v35, v26 ; D282001A 046A4717 v_mad_f32 v9, v23, v9, v27 ; D2820009 046E1317 v_mad_f32 v22, s26, v15, v22 ; D2820016 045A1E1A v_mad_f32 v23, s26, v15, v31 ; D2820017 047E1E1A v_mad_f32 v15, s26, v15, v21 ; D282000F 04561E1A v_mov_b32_e32 v21, 0x358637bd ; 7E2A02FF 358637BD v_max_f32_e64 v22, |v22|, v21 ; D2200116 00022B16 v_max_f32_e64 v23, |v23|, v21 ; D2200117 00022B17 v_max_f32_e64 v15, |v15|, v21 ; D220010F 00022B0F v_mad_f32 v21, v22, v22, -v25 ; D2820015 84662D16 v_mad_f32 v21, v13, v21, v25 ; D2820015 04662B0D v_mad_f32 v22, v23, v23, -v26 ; D2820016 846A2F17 v_mad_f32 v22, v13, v22, v26 ; D2820016 046A2D0D v_mad_f32 v15, v15, v15, -v9 ; D282000F 84261F0F v_mad_f32 v9, v13, v15, v9 ; D2820009 04261F0D image_sample v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[16:19] ; F0800700 00881902 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, s22, v25 ; 10043216 v_mul_f32_e32 v3, s22, v26 ; 10063416 v_mul_f32_e32 v13, s22, v27 ; 101A3616 v_add_f32_e64 v15, |v32|, v37 ; D206010F 00024B20 v_cmp_gt_f32_e32 vcc, 0, v15 ; 7C081E80 v_cndmask_b32_e64 v15, v33, 0, vcc ; D200000F 01A90121 v_add_f32_e32 v15, v21, v15 ; 061E1F15 v_cndmask_b32_e64 v21, v34, 0, vcc ; D2000015 01A90122 v_add_f32_e32 v21, v22, v21 ; 062A2B16 v_cndmask_b32_e64 v22, v24, 0, vcc ; D2000016 01A90118 v_add_f32_e32 v9, v9, v22 ; 06122D09 v_mul_f32_e32 v22, 0x3e99999a, v15 ; 102C1EFF 3E99999A v_mad_f32 v14, v21, v14, v22 ; D282000E 045A1D15 v_mad_f32 v14, v9, v20, v14 ; D282000E 043A2909 v_sub_f32_e64 v20, 1.0, s21 ; D2080014 00002AF2 v_mul_f32_e32 v15, v15, v20 ; 101E290F v_mul_f32_e32 v21, v21, v20 ; 102A2915 v_mul_f32_e32 v9, v9, v20 ; 10122909 v_mad_f32 v15, s21, v14, v15 ; D282000F 043E1C15 v_mad_f32 v20, s21, v14, v21 ; D2820014 04561C15 v_mad_f32 v9, s21, v14, v9 ; D2820009 04261C15 v_mul_f32_e32 v14, s20, v15 ; 101C1E14 v_sub_f32_e64 v15, 1.0, s15 ; D208000F 00001EF2 v_mul_f32_e32 v14, v14, v15 ; 101C1F0E v_mul_f32_e32 v15, s20, v20 ; 101E2814 v_sub_f32_e64 v20, 1.0, s14 ; D2080014 00001CF2 v_mul_f32_e32 v15, v15, v20 ; 101E290F v_mul_f32_e32 v9, s20, v9 ; 10121214 v_sub_f32_e64 v20, 1.0, s13 ; D2080014 00001AF2 v_mul_f32_e32 v9, v9, v20 ; 10122909 v_mad_f32 v14, v1, v14, s8 ; D282000E 00221D01 v_mad_f32 v15, v15, v1, s9 ; D282000F 0026030F v_mad_f32 v1, v9, v1, s10 ; D2820001 002A0309 v_mul_f32_e32 v2, s20, v2 ; 10040414 v_mul_f32_e32 v3, s20, v3 ; 10060614 v_mul_f32_e32 v9, s20, v13 ; 10121A14 v_mad_f32 v2, v0, v2, s1 ; D2820002 00060500 v_mad_f32 v3, v3, v0, s11 ; D2820003 002E0103 v_mad_f32 v0, v9, v0, s12 ; D2820000 00320109 v_mul_f32_e32 v9, v17, v18 ; 10122511 v_mad_f32 v9, v19, v8, v9 ; D2820009 04261113 v_mul_f32_e32 v12, v17, v12 ; 10181911 v_mad_f32 v8, v11, v8, v12 ; D2820008 0432110B v_mad_f32 v9, v16, v7, v9 ; D2820009 04260F10 v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_mad_f32 v7, v10, v7, v8 ; D2820007 04220F0A v_add_f32_e32 v8, v37, v9 ; 06101325 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 v_and_b32_e32 v10, v7, v29 ; 36143B07 v_log_f32_e32 v10, v10 ; 7E144F0A v_cmp_gt_f32_e32 vcc, 0, v8 ; 7C081080 v_cndmask_b32_e64 v8, v9, 0, vcc ; D2000008 01A90109 v_add_f32_e32 v7, v37, v7 ; 060E0F25 v_mul_legacy_f32_e32 v9, 0x41700000, v10 ; 0E1214FF 41700000 v_exp_f32_e32 v9, v9 ; 7E124B09 v_mul_f32_e32 v9, 0x3f6a46ad, v9 ; 101212FF 3F6A46AD v_mul_f32_e32 v2, v9, v2 ; 10040509 v_mul_f32_e32 v3, v9, v3 ; 10060709 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_cmp_gt_f32_e32 vcc, 0, v7 ; 7C080E80 v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 v_mad_f32 v2, v14, v8, v2 ; D2820002 040A110E v_mad_f32 v3, v15, v8, v3 ; D2820003 040E110F v_mad_f32 v0, v1, v8, v0 ; D2820000 04021101 v_mul_f32_e32 v1, v2, v6 ; 10020D02 v_mul_f32_e32 v2, v3, v5 ; 10040B03 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v2, s2, v2 ; 10040402 v_mul_f32_e32 v0, s3, v0 ; 10000003 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 40 Code Size: 1964 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL IN[2], TEXCOORD[4], PERSPECTIVE DCL IN[3], TEXCOORD[6], PERSPECTIVE DCL IN[4], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..6] IMM[0] FLT32 { 2.0000, -1.0000, -0.0000, 0.0000} IMM[1] FLT32 { 8.0000, 0.0398, 0.0000, 340282346638528859811704183484516925440.0000} 0: DP3 TEMP[0].x, IN[3], IN[3] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[3], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[2], IN[2] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[2], TEMP[0].xxxx 8: TEX TEMP[3], IN[1].wzzw, SAMP[1], 2D 9: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[0].x, TEMP[3], TEMP[3] 11: RSQ TEMP[0].x, TEMP[0].xxxx 12: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 13: MUL TEMP[4].xyz, TEMP[3], TEMP[0].xxxx 14: DP3 TEMP[1].w, TEMP[4], TEMP[1] 15: MUL TEMP[3].xyz, TEMP[1].wwww, TEMP[4] 16: MAD TEMP[1].xyz, TEMP[3], IMM[0].xxxx, -TEMP[1] 17: TEX TEMP[3], IN[0], SAMP[4], 2D 18: ADD TEMP[1].w, TEMP[3].xxxx, CONST[13].xxxx 19: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[13].yyyy 20: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].zzzz 21: POW TEMP[3].x, |TEMP[1].wwww|, CONST[13].zzzz 22: CMP TEMP[3].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[3].xxxx 23: UIF CONST[240].xxxx :45 24: MUL TEMP[5].xyz, CONST[7].xyww, IN[4].yyyy 25: MAD TEMP[5].xyz, CONST[6].xyww, IN[4].xxxx, TEMP[5] 26: MAD TEMP[5].xyz, CONST[8].xyww, IN[4].zzzz, TEMP[5] 27: MAD TEMP[5].xyz, CONST[9].xyww, IN[4].wwww, TEMP[5] 28: RCP TEMP[1].w, TEMP[5].zzzz 29: MUL TEMP[5].xy, TEMP[1].wwww, TEMP[5] 30: MAD TEMP[5].xy, TEMP[5], CONST[1], CONST[1].wzzw 31: TEX TEMP[5], TEMP[5], SAMP[0], 2D 32: UIF CONST[240].yyyy :0 33: DP3 TEMP[1].w, IN[4], IN[4] 34: RSQ TEMP[0], |TEMP[1].wwww| 35: MIN TEMP[1].w, IMM[1].wwww, TEMP[0] 36: RCP TEMP[1].w, TEMP[1].wwww 37: ADD TEMP[1].w, -TEMP[1].wwww, CONST[0].xxxx 38: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[0].yyyy 39: MUL TEMP[1].w, TEMP[1].wwww, TEMP[1].wwww 40: LRP TEMP[6].xyz, TEMP[1].wwww, TEMP[5].wwww, TEMP[3].zzzz 41: ELSE :43 42: MOV TEMP[6].xyz, TEMP[3].zzzz 43: ENDIF 44: MUL TEMP[3].xyz, TEMP[5], TEMP[6] 45: ENDIF 46: MOV TEMP[5].y, IMM[0].yyyy 47: ADD TEMP[5].xyz, -TEMP[5].yyyy, -CONST[10] 48: TEX TEMP[6], IN[1], SAMP[2], 2D 49: MUL TEMP[5].xyz, TEMP[5], TEMP[6] 50: MAD TEMP[5].xyz, TEMP[5], CONST[4].wwww, CONST[4] 51: TEX TEMP[6], IN[1], SAMP[3], 2D 52: MAD TEMP[6].xyz, TEMP[6], CONST[5].wwww, CONST[5] 53: DP3_SAT TEMP[1].w, TEMP[4], TEMP[2] 54: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].zzzz 55: DP3_SAT TEMP[1].x, TEMP[1], TEMP[2] 56: ADD TEMP[1].y, TEMP[1].xxxx, IMM[0].zzzz 57: POW TEMP[2].x, |TEMP[1].xxxx|, CONST[11].xxxx 58: MOV TEMP[1].x, CONST[11].xxxx 59: ADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx 60: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 61: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 62: MUL TEMP[2].xyz, TEMP[1].wwww, TEMP[5] 63: CMP TEMP[2].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[2] 64: MUL TEMP[1].xzw, TEMP[6].xyyz, TEMP[1].xxxx 65: CMP TEMP[1].xyz, TEMP[1].yyyy, IMM[0].wwww, TEMP[1].xzww 66: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 67: MUL TEMP[1].xyz, TEMP[3], TEMP[1] 68: MUL OUT[0].xyz, TEMP[1], CONST[12] 69: MOV OUT[0].w, IMM[0].wwww 70: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %45 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %46 = bitcast <8 x i32> addrspace(2)* %45 to <32 x i8> addrspace(2)* %47 = load <32 x i8>, <32 x i8> addrspace(2)* %46, align 32, !tbaa !0 %48 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %49 = bitcast <4 x i32> addrspace(2)* %48 to <16 x i8> addrspace(2)* %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 %51 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %52 = bitcast <8 x i32> addrspace(2)* %51 to <32 x i8> addrspace(2)* %53 = load <32 x i8>, <32 x i8> addrspace(2)* %52, align 32, !tbaa !0 %54 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %55 = bitcast <4 x i32> addrspace(2)* %54 to <16 x i8> addrspace(2)* %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 %57 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %58 = bitcast <8 x i32> addrspace(2)* %57 to <32 x i8> addrspace(2)* %59 = load <32 x i8>, <32 x i8> addrspace(2)* %58, align 32, !tbaa !0 %60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %61 = bitcast <4 x i32> addrspace(2)* %60 to <16 x i8> addrspace(2)* %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %64 = bitcast <8 x i32> addrspace(2)* %63 to <32 x i8> addrspace(2)* %65 = load <32 x i8>, <32 x i8> addrspace(2)* %64, align 32, !tbaa !0 %66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %67 = bitcast <4 x i32> addrspace(2)* %66 to <16 x i8> addrspace(2)* %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %71 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %85 = fmul float %78, %78 %86 = fmul float %79, %79 %87 = fadd float %86, %85 %88 = fmul float %80, %80 %89 = fadd float %87, %88 %90 = call float @llvm.AMDGPU.rsq.clamped.f32(float %89) %91 = call float @llvm.minnum.f32(float %90, float 0x47EFFFFFE0000000) %92 = fmul float %78, %91 %93 = fmul float %79, %91 %94 = fmul float %80, %91 %95 = fmul float %75, %75 %96 = fmul float %76, %76 %97 = fadd float %96, %95 %98 = fmul float %77, %77 %99 = fadd float %97, %98 %100 = call float @llvm.AMDGPU.rsq.clamped.f32(float %99) %101 = call float @llvm.minnum.f32(float %100, float 0x47EFFFFFE0000000) %102 = fmul float %75, %101 %103 = fmul float %76, %101 %104 = fmul float %77, %101 %105 = bitcast float %74 to i32 %106 = bitcast float %73 to i32 %107 = insertelement <2 x i32> undef, i32 %105, i32 0 %108 = insertelement <2 x i32> %107, i32 %106, i32 1 %109 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %108, <32 x i8> %47, <16 x i8> %50, i32 2) %110 = extractelement <4 x float> %109, i32 0 %111 = extractelement <4 x float> %109, i32 1 %112 = extractelement <4 x float> %109, i32 2 %113 = fmul float %110, 2.000000e+00 %114 = fadd float %113, -1.000000e+00 %115 = fmul float %111, 2.000000e+00 %116 = fadd float %115, -1.000000e+00 %117 = fmul float %112, 2.000000e+00 %118 = fadd float %117, -1.000000e+00 %119 = fmul float %114, %114 %120 = fmul float %116, %116 %121 = fadd float %120, %119 %122 = fmul float %118, %118 %123 = fadd float %121, %122 %124 = call float @llvm.AMDGPU.rsq.clamped.f32(float %123) %125 = call float @llvm.minnum.f32(float %124, float 0x47EFFFFFE0000000) %126 = fmul float %114, %125 %127 = fmul float %116, %125 %128 = fmul float %118, %125 %129 = fmul float %126, %92 %130 = fmul float %127, %93 %131 = fadd float %130, %129 %132 = fmul float %128, %94 %133 = fadd float %131, %132 %134 = fmul float %133, %126 %135 = fmul float %133, %127 %136 = fmul float %133, %128 %137 = fmul float %134, 2.000000e+00 %138 = fsub float %137, %92 %139 = fmul float %135, 2.000000e+00 %140 = fsub float %139, %93 %141 = fmul float %136, 2.000000e+00 %142 = fsub float %141, %94 %143 = bitcast float %69 to i32 %144 = bitcast float %70 to i32 %145 = insertelement <2 x i32> undef, i32 %143, i32 0 %146 = insertelement <2 x i32> %145, i32 %144, i32 1 %147 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %146, <32 x i8> %65, <16 x i8> %68, i32 2) %148 = extractelement <4 x float> %147, i32 0 %149 = fadd float %148, %41 %150 = fmul float %149, %42 %151 = call float @llvm.AMDIL.clamp.(float %150, float 0.000000e+00, float 1.000000e+00) %152 = fadd float %151, 0xBEB0C6F7A0000000 %153 = call float @fabs(float %151) %154 = call float @llvm.pow.f32(float %153, float %43) %155 = call float @llvm.AMDGPU.cndlt(float %152, float 0.000000e+00, float %154) %156 = call float @llvm.AMDGPU.cndlt(float %152, float 0.000000e+00, float %154) %157 = call float @llvm.AMDGPU.cndlt(float %152, float 0.000000e+00, float %154) %158 = bitcast float %44 to i32 %159 = icmp eq i32 %158, 0 br i1 %159, label %ENDIF, label %IF IF: ; preds = %main_body %160 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %161 = load <16 x i8>, <16 x i8> addrspace(2)* %160, align 16, !tbaa !0 %162 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %163 = load <32 x i8>, <32 x i8> addrspace(2)* %162, align 32, !tbaa !0 %164 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3844) %165 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %166 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %167 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %168 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %169 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %170 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %171 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %172 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %173 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %174 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %175 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %176 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %177 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %178 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %179 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %180 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %181 = fmul float %173, %82 %182 = fmul float %172, %82 %183 = fmul float %171, %82 %184 = fmul float %176, %81 %185 = fadd float %184, %181 %186 = fmul float %175, %81 %187 = fadd float %186, %182 %188 = fmul float %174, %81 %189 = fadd float %188, %183 %190 = fmul float %170, %83 %191 = fadd float %190, %185 %192 = fmul float %169, %83 %193 = fadd float %192, %187 %194 = fmul float %168, %83 %195 = fadd float %194, %189 %196 = fmul float %167, %84 %197 = fadd float %196, %191 %198 = fmul float %166, %84 %199 = fadd float %198, %193 %200 = fmul float %165, %84 %201 = fadd float %200, %195 %202 = fdiv float 1.000000e+00, %201 %203 = fmul float %202, %197 %204 = fmul float %202, %199 %205 = fmul float %203, %180 %206 = fadd float %205, %177 %207 = fmul float %204, %179 %208 = fadd float %207, %178 %209 = bitcast float %206 to i32 %210 = bitcast float %208 to i32 %211 = insertelement <2 x i32> undef, i32 %209, i32 0 %212 = insertelement <2 x i32> %211, i32 %210, i32 1 %213 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %212, <32 x i8> %163, <16 x i8> %161, i32 2) %214 = extractelement <4 x float> %213, i32 0 %215 = extractelement <4 x float> %213, i32 1 %216 = extractelement <4 x float> %213, i32 2 %217 = extractelement <4 x float> %213, i32 3 %218 = bitcast float %164 to i32 %219 = icmp eq i32 %218, 0 br i1 %219, label %ENDIF28, label %IF29 ENDIF: ; preds = %main_body, %ENDIF28 %temp12.0 = phi float [ %314, %ENDIF28 ], [ %155, %main_body ] %temp13.0 = phi float [ %315, %ENDIF28 ], [ %156, %main_body ] %temp14.0 = phi float [ %316, %ENDIF28 ], [ %157, %main_body ] %220 = fsub float 1.000000e+00, %34 %221 = fsub float 1.000000e+00, %35 %222 = fsub float 1.000000e+00, %36 %223 = bitcast float %71 to i32 %224 = bitcast float %72 to i32 %225 = insertelement <2 x i32> undef, i32 %223, i32 0 %226 = insertelement <2 x i32> %225, i32 %224, i32 1 %227 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %226, <32 x i8> %53, <16 x i8> %56, i32 2) %228 = extractelement <4 x float> %227, i32 0 %229 = extractelement <4 x float> %227, i32 1 %230 = extractelement <4 x float> %227, i32 2 %231 = fmul float %220, %228 %232 = fmul float %221, %229 %233 = fmul float %222, %230 %234 = fmul float %231, %29 %235 = fadd float %234, %26 %236 = fmul float %232, %29 %237 = fadd float %236, %27 %238 = fmul float %233, %29 %239 = fadd float %238, %28 %240 = bitcast float %71 to i32 %241 = bitcast float %72 to i32 %242 = insertelement <2 x i32> undef, i32 %240, i32 0 %243 = insertelement <2 x i32> %242, i32 %241, i32 1 %244 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %243, <32 x i8> %59, <16 x i8> %62, i32 2) %245 = extractelement <4 x float> %244, i32 0 %246 = extractelement <4 x float> %244, i32 1 %247 = extractelement <4 x float> %244, i32 2 %248 = fmul float %245, %33 %249 = fadd float %248, %30 %250 = fmul float %246, %33 %251 = fadd float %250, %31 %252 = fmul float %247, %33 %253 = fadd float %252, %32 %254 = fmul float %126, %102 %255 = fmul float %127, %103 %256 = fadd float %255, %254 %257 = fmul float %128, %104 %258 = fadd float %256, %257 %259 = call float @llvm.AMDIL.clamp.(float %258, float 0.000000e+00, float 1.000000e+00) %260 = fadd float %259, 0xBEB0C6F7A0000000 %261 = fmul float %138, %102 %262 = fmul float %140, %103 %263 = fadd float %262, %261 %264 = fmul float %142, %104 %265 = fadd float %263, %264 %266 = call float @llvm.AMDIL.clamp.(float %265, float 0.000000e+00, float 1.000000e+00) %267 = fadd float %266, 0xBEB0C6F7A0000000 %268 = call float @fabs(float %266) %269 = call float @llvm.pow.f32(float %268, float %37) %270 = fadd float %37, 8.000000e+00 %271 = fmul float %270, %269 %272 = fmul float %271, 0x3FA45F3060000000 %273 = fmul float %259, %235 %274 = fmul float %259, %237 %275 = fmul float %259, %239 %276 = call float @llvm.AMDGPU.cndlt(float %260, float 0.000000e+00, float %273) %277 = call float @llvm.AMDGPU.cndlt(float %260, float 0.000000e+00, float %274) %278 = call float @llvm.AMDGPU.cndlt(float %260, float 0.000000e+00, float %275) %279 = fmul float %249, %272 %280 = fmul float %251, %272 %281 = fmul float %253, %272 %282 = call float @llvm.AMDGPU.cndlt(float %267, float 0.000000e+00, float %279) %283 = call float @llvm.AMDGPU.cndlt(float %267, float 0.000000e+00, float %280) %284 = call float @llvm.AMDGPU.cndlt(float %267, float 0.000000e+00, float %281) %285 = fadd float %282, %276 %286 = fadd float %283, %277 %287 = fadd float %284, %278 %288 = fmul float %temp12.0, %285 %289 = fmul float %temp13.0, %286 %290 = fmul float %temp14.0, %287 %291 = fmul float %288, %38 %292 = fmul float %289, %39 %293 = fmul float %290, %40 %294 = call i32 @llvm.SI.packf16(float %291, float %292) %295 = bitcast i32 %294 to float %296 = call i32 @llvm.SI.packf16(float %293, float 0.000000e+00) %297 = bitcast i32 %296 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %295, float %297, float %295, float %297) ret void IF29: ; preds = %IF %298 = fmul float %81, %81 %299 = fmul float %82, %82 %300 = fadd float %299, %298 %301 = fmul float %83, %83 %302 = fadd float %300, %301 %303 = call float @fabs(float %302) %304 = call float @llvm.AMDGPU.rsq.clamped.f32(float %303) %305 = call float @llvm.minnum.f32(float %304, float 0x47EFFFFFE0000000) %306 = fdiv float 1.000000e+00, %305 %307 = fsub float %24, %306 %308 = fmul float %307, %25 %309 = call float @llvm.AMDIL.clamp.(float %308, float 0.000000e+00, float 1.000000e+00) %310 = fmul float %309, %309 %311 = call float @llvm.AMDGPU.lrp(float %310, float %217, float %157) %312 = call float @llvm.AMDGPU.lrp(float %310, float %217, float %157) %313 = call float @llvm.AMDGPU.lrp(float %310, float %217, float %157) br label %ENDIF28 ENDIF28: ; preds = %IF, %IF29 %temp24.0 = phi float [ %311, %IF29 ], [ %157, %IF ] %temp25.0 = phi float [ %312, %IF29 ], [ %157, %IF ] %temp26.0 = phi float [ %313, %IF29 ], [ %157, %IF ] %314 = fmul float %214, %temp24.0 %315 = fmul float %215, %temp25.0 %316 = fmul float %216, %temp26.0 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_movk_i32 s0, 0xf00 ; B0000F00 v_mov_b32_e32 v10, 0xb58637bd ; 7E1402FF B58637BD s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p1_f32 v11, v0, 0, 0, [m0] ; C82C0000 v_interp_p2_f32 v11, [v11], v1, 0, 0, [m0] ; C82D0001 v_interp_p1_f32 v12, v0, 1, 0, [m0] ; C8300100 v_interp_p2_f32 v12, [v12], v1, 1, 0, [m0] ; C8310101 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v14, v0, 2, 1, [m0] ; C8380600 v_interp_p2_f32 v14, [v14], v1, 2, 1, [m0] ; C8390601 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s2, s[8:11], s0 ; C2010800 v_interp_p1_f32 v13, v0, 3, 1, [m0] ; C8340700 v_interp_p2_f32 v13, [v13], v1, 3, 1, [m0] ; C8350701 s_buffer_load_dword s0, s[8:11], 0x13 ; C2000913 s_buffer_load_dword s1, s[8:11], 0x17 ; C2008917 s_buffer_load_dword s3, s[8:11], 0x34 ; C2018934 s_buffer_load_dword s36, s[8:11], 0x35 ; C2120935 s_buffer_load_dword s37, s[8:11], 0x36 ; C2128936 v_interp_p1_f32 v4, v0, 0, 2, [m0] ; C8100800 v_interp_p2_f32 v4, [v4], v1, 0, 2, [m0] ; C8110801 v_interp_p1_f32 v5, v0, 1, 2, [m0] ; C8140900 v_interp_p2_f32 v5, [v5], v1, 1, 2, [m0] ; C8150901 v_interp_p1_f32 v6, v0, 2, 2, [m0] ; C8180A00 v_interp_p2_f32 v6, [v6], v1, 2, 2, [m0] ; C8190A01 v_interp_p1_f32 v9, v0, 0, 3, [m0] ; C8240C00 v_interp_p2_f32 v9, [v9], v1, 0, 3, [m0] ; C8250C01 v_interp_p1_f32 v8, v0, 1, 3, [m0] ; C8200D00 v_interp_p2_f32 v8, [v8], v1, 1, 3, [m0] ; C8210D01 v_interp_p1_f32 v7, v0, 2, 3, [m0] ; C81C0E00 v_interp_p2_f32 v7, [v7], v1, 2, 3, [m0] ; C81D0E01 s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_load_dwordx4 s[12:15], s[4:5], 0x10 ; C0860510 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 s_load_dwordx8 s[28:35], s[6:7], 0x20 ; C0CE0720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[20:27], s[16:19] ; F0800700 00850F0D image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[28:35], s[12:15] ; F0800100 00670B0B s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v11, s3, v11 ; 06161603 v_mul_f32_e32 v11, s36, v11 ; 10161624 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_and_b32_e32 v12, 0x7fffffff, v11 ; 361816FF 7FFFFFFF v_log_f32_e32 v18, v12 ; 7E244F0C v_mad_f32 v14, 2.0, v15, -1.0 ; D282000E 03CE1EF4 v_mad_f32 v13, 2.0, v16, -1.0 ; D282000D 03CE20F4 v_mad_f32 v12, 2.0, v17, -1.0 ; D282000C 03CE22F4 v_add_f32_e32 v10, v11, v10 ; 0614150B v_mul_legacy_f32_e32 v16, s37, v18 ; 0E202425 v_mul_f32_e32 v11, v9, v9 ; 10161309 v_mad_f32 v11, v8, v8, v11 ; D282000B 042E1108 v_mad_f32 v11, v7, v7, v11 ; D282000B 042E0F07 v_rsq_clamp_f32_e32 v15, v11 ; 7E1E590B v_mul_f32_e32 v11, v4, v4 ; 10160904 v_mad_f32 v11, v5, v5, v11 ; D282000B 042E0B05 v_mad_f32 v11, v6, v6, v11 ; D282000B 042E0D06 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_mul_f32_e32 v17, v14, v14 ; 10221D0E v_mad_f32 v17, v13, v13, v17 ; D2820011 04461B0D v_mad_f32 v17, v12, v12, v17 ; D2820011 0446190C v_rsq_clamp_f32_e32 v18, v17 ; 7E245911 v_exp_f32_e32 v16, v16 ; 7E204B10 v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v10, v16, 0, vcc ; D200000A 01A90110 v_cmp_ne_i32_e64 s[2:3], 0, s2 ; D10A0002 00000480 v_mov_b32_e32 v16, v10 ; 7E20030A v_mov_b32_e32 v17, v10 ; 7E22030A s_and_saveexec_b64 s[2:3], s[2:3] ; BE822402 s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E s_cbranch_execz BB0_4 ; BF880000 v_interp_p1_f32 v16, v0, 0, 4, [m0] ; C8401000 v_interp_p2_f32 v16, [v16], v1, 0, 4, [m0] ; C8411001 v_interp_p1_f32 v17, v0, 1, 4, [m0] ; C8441100 v_interp_p2_f32 v17, [v17], v1, 1, 4, [m0] ; C8451101 v_interp_p1_f32 v19, v0, 2, 4, [m0] ; C84C1200 v_interp_p2_f32 v19, [v19], v1, 2, 4, [m0] ; C84D1201 s_buffer_load_dword s12, s[8:11], 0x19 ; C2060919 s_buffer_load_dword s13, s[8:11], 0x1b ; C206891B s_buffer_load_dword s14, s[8:11], 0x1c ; C207091C s_buffer_load_dword s15, s[8:11], 0x1d ; C207891D s_buffer_load_dword s16, s[8:11], 0x4 ; C2080904 s_buffer_load_dword s17, s[8:11], 0x5 ; C2088905 s_buffer_load_dword s18, s[8:11], 0x6 ; C2090906 s_buffer_load_dword s19, s[8:11], 0x7 ; C2098907 s_buffer_load_dword s20, s[8:11], 0x18 ; C20A0918 v_interp_p1_f32 v0, v0, 3, 4, [m0] ; C8001300 v_interp_p2_f32 v0, [v0], v1, 3, 4, [m0] ; C8011301 s_buffer_load_dword s21, s[8:11], 0x27 ; C20A8927 s_buffer_load_dword s22, s[8:11], 0x1f ; C20B091F s_buffer_load_dword s23, s[8:11], 0x20 ; C20B8920 s_buffer_load_dword s24, s[8:11], 0x21 ; C20C0921 s_buffer_load_dword s25, s[8:11], 0x23 ; C20C8923 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s14, v17 ; 1002220E v_mul_f32_e32 v20, s15, v17 ; 1028220F v_mad_f32 v1, s20, v16, v1 ; D2820001 04062014 v_mad_f32 v20, s12, v16, v20 ; D2820014 0452200C s_buffer_load_dword s12, s[8:11], 0x24 ; C2060924 s_buffer_load_dword s14, s[8:11], 0x25 ; C2070925 v_mul_f32_e32 v21, s22, v17 ; 102A2216 v_mad_f32 v21, s13, v16, v21 ; D2820015 0456200D v_mad_f32 v1, s23, v19, v1 ; D2820001 04062617 v_mad_f32 v21, s25, v19, v21 ; D2820015 04562619 v_mad_f32 v21, s21, v0, v21 ; D2820015 04560015 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_movk_i32 s13, 0xf04 ; B00D0F04 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_buffer_load_dword s13, s[8:11], s13 ; C206880D v_rcp_f32_e32 v21, v21 ; 7E2A5515 v_mad_f32 v20, s24, v19, v20 ; D2820014 04522618 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s12, v0, v1 ; D2820001 0406000C v_mad_f32 v0, s14, v0, v20 ; D2820000 0452000E v_mul_f32_e32 v1, v1, v21 ; 10022B01 v_mul_f32_e32 v0, v0, v21 ; 10002B00 v_mov_b32_e32 v20, s19 ; 7E280213 v_mad_f32 v20, s16, v1, v20 ; D2820014 04520210 v_mov_b32_e32 v1, s18 ; 7E020212 v_mad_f32 v21, s17, v0, v1 ; D2820015 04060011 image_sample v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[28:35], s[20:23] ; F0800F00 00A71414 v_cmp_ne_i32_e64 s[12:13], 0, s13 ; D10A000C 00001A80 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[12:13], s[12:13] ; BE8C240C s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_cbranch_execz BB0_5 ; BF880000 s_buffer_load_dword s14, s[8:11], 0x0 ; C2070900 s_buffer_load_dword s15, s[8:11], 0x1 ; C2078901 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v16, v16 ; 10002110 v_mad_f32 v0, v17, v17, v0 ; D2820000 04022311 v_mad_f32 v0, v19, v19, v0 ; D2820000 04022713 v_rsq_clamp_f32_e64 v0, |v0| ; D3580100 00000100 v_min_f32_e32 v0, 0x7f7fffff, v0 ; 1E0000FF 7F7FFFFF v_rcp_f32_e32 v0, v0 ; 7E005500 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v0, s14, v0 ; 0800000E v_mul_f32_e32 v0, s15, v0 ; 1000000F v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v1, v0, v0 ; 10020100 v_mad_f32 v0, -v0, v0, 1.0 ; D2820000 23CA0100 v_mul_f32_e32 v0, v10, v0 ; 1000010A v_mad_f32 v10, v1, v23, v0 ; D282000A 04022F01 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E v_mul_f32_e32 v17, v10, v20 ; 1022290A v_mul_f32_e32 v16, v10, v21 ; 10202B0A v_mul_f32_e32 v10, v10, v22 ; 10142D0A s_or_b64 exec, exec, s[2:3] ; 88FE027E v_min_f32_e32 v0, 0x7f7fffff, v18 ; 1E0024FF 7F7FFFFF s_buffer_load_dword s18, s[8:11], 0x10 ; C2090910 s_buffer_load_dword s19, s[8:11], 0x11 ; C2098911 s_buffer_load_dword s13, s[8:11], 0x12 ; C2068912 s_buffer_load_dword s14, s[8:11], 0x14 ; C2070914 s_buffer_load_dword s15, s[8:11], 0x15 ; C2078915 s_buffer_load_dword s16, s[8:11], 0x16 ; C2080916 s_buffer_load_dword s20, s[8:11], 0x28 ; C20A0928 s_buffer_load_dword s21, s[8:11], 0x29 ; C20A8929 s_buffer_load_dword s17, s[8:11], 0x2a ; C208892A s_buffer_load_dword s12, s[8:11], 0x2c ; C206092C s_buffer_load_dword s2, s[8:11], 0x30 ; C2010930 s_buffer_load_dword s3, s[8:11], 0x31 ; C2018931 s_buffer_load_dword s8, s[8:11], 0x32 ; C2040932 v_mul_f32_e32 v14, v0, v14 ; 101C1D00 v_mul_f32_e32 v13, v0, v13 ; 101A1B00 v_mul_f32_e32 v0, v0, v12 ; 10001900 v_min_f32_e32 v1, 0x7f7fffff, v15 ; 1E021EFF 7F7FFFFF v_mul_f32_e32 v12, v1, v9 ; 10181301 v_mul_f32_e32 v12, v12, v14 ; 10181D0C v_mul_f32_e32 v15, v1, v8 ; 101E1101 v_mad_f32 v12, v13, v15, v12 ; D282000C 04321F0D v_mul_f32_e32 v15, v1, v7 ; 101E0F01 v_mad_f32 v12, v0, v15, v12 ; D282000C 04321F00 v_mul_f32_e32 v15, v14, v12 ; 101E190E v_mad_f32 v15, v12, v14, v15 ; D282000F 043E1D0C v_mad_f32 v9, -v9, v1, v15 ; D2820009 243E0309 v_mul_f32_e32 v15, v13, v12 ; 101E190D s_load_dwordx4 s[24:27], s[4:5], 0x8 ; C08C0508 s_load_dwordx4 s[28:31], s[4:5], 0xc ; C08E050C s_load_dwordx8 s[32:39], s[6:7], 0x10 ; C0D00710 s_load_dwordx8 s[40:47], s[6:7], 0x18 ; C0D40718 v_mad_f32 v15, v12, v13, v15 ; D282000F 043E1B0C v_mad_f32 v8, -v8, v1, v15 ; D2820008 243E0308 v_mul_f32_e32 v15, v0, v12 ; 101E1900 v_mad_f32 v12, v12, v0, v15 ; D282000C 043E010C v_mad_f32 v1, -v7, v1, v12 ; D2820001 24320307 v_min_f32_e32 v12, 0x7f7fffff, v11 ; 1E1816FF 7F7FFFFF v_mul_f32_e32 v7, v12, v4 ; 100E090C v_mul_f32_e32 v11, v12, v5 ; 10160B0C v_mul_f32_e32 v4, v12, v6 ; 10080D0C v_mov_b32_e32 v5, s0 ; 7E0A0200 v_mov_b32_e32 v6, s1 ; 7E0C0201 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[24:27] ; F0800700 00C81202 image_sample v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[40:47], s[28:31] ; F0800700 00EA1502 v_sub_f32_e64 v2, 1.0, s20 ; D2080002 000028F2 v_sub_f32_e64 v3, 1.0, s21 ; D2080003 00002AF2 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v2, v18, v2 ; 10040512 v_mad_f32 v2, v5, v2, s18 ; D2820002 004A0505 v_mul_f32_e32 v3, v19, v3 ; 10060713 v_mad_f32 v3, v3, v5, s19 ; D2820003 004E0B03 v_mul_f32_e32 v12, v7, v14 ; 10181D07 v_mad_f32 v12, v13, v11, v12 ; D282000C 0432170D v_mul_f32_e32 v7, v7, v9 ; 100E1307 v_mad_f32 v7, v8, v11, v7 ; D2820007 041E1708 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v8, v6, v21, s14 ; D2820008 003A2B06 v_mad_f32 v9, v22, v6, s15 ; D2820009 003E0D16 v_mad_f32 v6, v23, v6, s16 ; D2820006 00420D17 v_sub_f32_e64 v11, 1.0, s17 ; D208000B 000022F2 v_mul_f32_e32 v11, v20, v11 ; 10161714 v_mad_f32 v5, v11, v5, s13 ; D2820005 00360B0B v_mad_f32 v0, v0, v4, v12 ; D2820000 04320900 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mad_f32 v1, v1, v4, v7 ; D2820001 041E0901 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_and_b32_e32 v4, 0x7fffffff, v1 ; 360802FF 7FFFFFFF v_log_f32_e32 v4, v4 ; 7E084F04 v_mov_b32_e32 v7, 0xb58637bd ; 7E0E02FF B58637BD v_add_f32_e32 v11, v7, v0 ; 06160107 v_add_f32_e32 v1, v7, v1 ; 06020307 v_mul_legacy_f32_e32 v4, s12, v4 ; 0E08080C v_exp_f32_e32 v4, v4 ; 7E084B04 v_mov_b32_e32 v7, 0x41000000 ; 7E0E02FF 41000000 v_add_f32_e32 v7, s12, v7 ; 060E0E0C v_mul_f32_e32 v4, v4, v7 ; 10080F04 v_mul_f32_e32 v4, 0x3d22f983, v4 ; 100808FF 3D22F983 v_mul_f32_e32 v2, v2, v0 ; 10040102 v_mul_f32_e32 v3, v3, v0 ; 10060103 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 v_mul_f32_e32 v5, v4, v8 ; 100A1104 v_mul_f32_e32 v7, v4, v9 ; 100E1304 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, v5, 0, vcc ; D2000001 01A90105 v_cndmask_b32_e64 v5, v7, 0, vcc ; D2000005 01A90107 v_cndmask_b32_e64 v4, v4, 0, vcc ; D2000004 01A90104 v_add_f32_e32 v1, v2, v1 ; 06020302 v_add_f32_e32 v2, v3, v5 ; 06040B03 v_add_f32_e32 v0, v0, v4 ; 06000900 v_mul_f32_e32 v1, v1, v17 ; 10022301 v_mul_f32_e32 v2, v2, v16 ; 10042102 v_mul_f32_e32 v0, v0, v10 ; 10001500 v_mul_f32_e32 v1, s2, v1 ; 10020202 v_mul_f32_e32 v2, s3, v2 ; 10040403 v_mul_f32_e32 v0, s8, v0 ; 10000008 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 24 Code Size: 1336 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[4], PERSPECTIVE DCL IN[2], TEXCOORD[6], PERSPECTIVE DCL IN[3], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..9] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, -0.0000} IMM[1] FLT32 { 0.3000, 0.5900, 0.1100, 0.0000} IMM[2] FLT32 { 15.0000, 0.9151, 0.0000, 340282346638528859811704183484516925440.0000} 0: DP3 TEMP[0].x, IN[2], IN[2] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[2].wwww, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[2], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[1], IN[1] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[2].wwww, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[1], TEMP[0].xxxx 8: TEX TEMP[3], IN[0], SAMP[1], 2D 9: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[0].x, TEMP[3], TEMP[3] 11: RSQ TEMP[0].x, TEMP[0].xxxx 12: MIN TEMP[0].x, IMM[2].wwww, TEMP[0].xxxx 13: MUL TEMP[4].xyz, TEMP[3], TEMP[0].xxxx 14: DP3 TEMP[1].w, TEMP[4], TEMP[1] 15: MUL TEMP[5].xyz, TEMP[1].wwww, TEMP[4] 16: MAD TEMP[5].xyz, TEMP[5], IMM[0].xxxx, -TEMP[1] 17: UIF CONST[240].xxxx :0 18: MUL TEMP[6].xyz, CONST[7].xyww, IN[3].yyyy 19: MAD TEMP[6].xyz, CONST[6].xyww, IN[3].xxxx, TEMP[6] 20: MAD TEMP[6].xyz, CONST[8].xyww, IN[3].zzzz, TEMP[6] 21: MAD TEMP[6].xyz, CONST[9].xyww, IN[3].wwww, TEMP[6] 22: RCP TEMP[1].w, TEMP[6].zzzz 23: MUL TEMP[6].xy, TEMP[1].wwww, TEMP[6] 24: MAD TEMP[6].xy, TEMP[6], CONST[1], CONST[1].wzzw 25: TEX TEMP[6], TEMP[6], SAMP[0], 2D 26: UIF CONST[240].yyyy :0 27: DP3 TEMP[1].w, IN[3], IN[3] 28: RSQ TEMP[0], |TEMP[1].wwww| 29: MIN TEMP[1].w, IMM[2].wwww, TEMP[0] 30: RCP TEMP[1].w, TEMP[1].wwww 31: ADD TEMP[1].w, -TEMP[1].wwww, CONST[0].xxxx 32: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[0].yyyy 33: MUL TEMP[1].w, TEMP[1].wwww, TEMP[1].wwww 34: ADD TEMP[2].w, TEMP[6].wwww, IMM[0].yyyy 35: MAD TEMP[7].xyz, TEMP[1].wwww, TEMP[2].wwww, IMM[0].zzzz 36: ELSE :38 37: MOV TEMP[7].xyz, IMM[0].zzzz 38: ENDIF 39: MUL TEMP[6].xyz, TEMP[6], TEMP[7] 40: ELSE :42 41: MOV TEMP[6].xyz, IMM[0].zzzz 42: ENDIF 43: MOV TEMP[7].z, IMM[0].zzzz 44: ADD TEMP[7].xyz, TEMP[7].zzzz, -CONST[10] 45: TEX TEMP[8], IN[0], SAMP[2], 2D 46: DP3 TEMP[1].w, TEMP[8], IMM[1] 47: LRP TEMP[9].xyz, CONST[12].yyyy, TEMP[1].wwww, TEMP[8] 48: DP3_SAT TEMP[1].x, TEMP[3], TEMP[1] 49: ADD TEMP[1].y, TEMP[1].xxxx, IMM[0].wwww 50: POW TEMP[2].w, |TEMP[1].xxxx|, CONST[12].zzzz 51: CMP TEMP[1].x, TEMP[1].yyyy, IMM[1].wwww, TEMP[2].wwww 52: MUL TEMP[1].yzw, TEMP[1].xxxx, CONST[11].xxyz 53: TEX TEMP[3], IN[0], SAMP[3], 2D 54: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].yyyy 55: MAD TEMP[1].yzw, TEMP[9].xxyz, TEMP[1], -TEMP[8].xxyz 56: MAD TEMP[1].xyz, TEMP[1].xxxx, TEMP[1].yzww, TEMP[8] 57: DP3 TEMP[1].w, TEMP[1], IMM[1] 58: LRP TEMP[3].xyz, CONST[12].wwww, TEMP[1].wwww, TEMP[1] 59: MUL TEMP[1].xyz, TEMP[3], CONST[13].xxxx 60: MUL TEMP[1].xyz, TEMP[7], TEMP[1] 61: MAD TEMP[1].xyz, TEMP[1], CONST[4].wwww, CONST[4] 62: TEX TEMP[3], IN[0], SAMP[4], 2D 63: MUL TEMP[3].xyz, TEMP[3], CONST[13].yyyy 64: MUL TEMP[3].xyz, TEMP[3], CONST[13].xxxx 65: MAD TEMP[3].xyz, TEMP[3], CONST[5].wwww, CONST[5] 66: DP3_SAT TEMP[1].w, TEMP[4], TEMP[2] 67: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].wwww 68: CMP TEMP[1].w, TEMP[2].wwww, IMM[1].wwww, TEMP[1].wwww 69: DP3_SAT TEMP[2].x, TEMP[5], TEMP[2] 70: ADD TEMP[2].y, TEMP[2].xxxx, IMM[0].wwww 71: POW TEMP[3].w, |TEMP[2].xxxx|, IMM[2].xxxx 72: MUL TEMP[2].x, TEMP[3].wwww, IMM[2].yyyy 73: MUL TEMP[2].xzw, TEMP[3].xyyz, TEMP[2].xxxx 74: CMP TEMP[2].xyz, TEMP[2].yyyy, IMM[1].wwww, TEMP[2].xzww 75: MAD TEMP[1].xyz, TEMP[1], TEMP[1].wwww, TEMP[2] 76: MUL TEMP[1].xyz, TEMP[6], TEMP[1] 77: MUL OUT[0].xyz, TEMP[1], CONST[14] 78: MOV OUT[0].w, IMM[1].wwww 79: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %49 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %50 = bitcast <8 x i32> addrspace(2)* %49 to <32 x i8> addrspace(2)* %51 = load <32 x i8>, <32 x i8> addrspace(2)* %50, align 32, !tbaa !0 %52 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %53 = bitcast <4 x i32> addrspace(2)* %52 to <16 x i8> addrspace(2)* %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %56 = bitcast <8 x i32> addrspace(2)* %55 to <32 x i8> addrspace(2)* %57 = load <32 x i8>, <32 x i8> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %59 = bitcast <4 x i32> addrspace(2)* %58 to <16 x i8> addrspace(2)* %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %62 = bitcast <8 x i32> addrspace(2)* %61 to <32 x i8> addrspace(2)* %63 = load <32 x i8>, <32 x i8> addrspace(2)* %62, align 32, !tbaa !0 %64 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %65 = bitcast <4 x i32> addrspace(2)* %64 to <16 x i8> addrspace(2)* %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %68 = bitcast <8 x i32> addrspace(2)* %67 to <32 x i8> addrspace(2)* %69 = load <32 x i8>, <32 x i8> addrspace(2)* %68, align 32, !tbaa !0 %70 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %71 = bitcast <4 x i32> addrspace(2)* %70 to <16 x i8> addrspace(2)* %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %85 = fmul float %78, %78 %86 = fmul float %79, %79 %87 = fadd float %86, %85 %88 = fmul float %80, %80 %89 = fadd float %87, %88 %90 = call float @llvm.AMDGPU.rsq.clamped.f32(float %89) %91 = call float @llvm.minnum.f32(float %90, float 0x47EFFFFFE0000000) %92 = fmul float %78, %91 %93 = fmul float %79, %91 %94 = fmul float %80, %91 %95 = fmul float %75, %75 %96 = fmul float %76, %76 %97 = fadd float %96, %95 %98 = fmul float %77, %77 %99 = fadd float %97, %98 %100 = call float @llvm.AMDGPU.rsq.clamped.f32(float %99) %101 = call float @llvm.minnum.f32(float %100, float 0x47EFFFFFE0000000) %102 = fmul float %75, %101 %103 = fmul float %76, %101 %104 = fmul float %77, %101 %105 = bitcast float %73 to i32 %106 = bitcast float %74 to i32 %107 = insertelement <2 x i32> undef, i32 %105, i32 0 %108 = insertelement <2 x i32> %107, i32 %106, i32 1 %109 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %108, <32 x i8> %51, <16 x i8> %54, i32 2) %110 = extractelement <4 x float> %109, i32 0 %111 = extractelement <4 x float> %109, i32 1 %112 = extractelement <4 x float> %109, i32 2 %113 = fmul float %110, 2.000000e+00 %114 = fadd float %113, -1.000000e+00 %115 = fmul float %111, 2.000000e+00 %116 = fadd float %115, -1.000000e+00 %117 = fmul float %112, 2.000000e+00 %118 = fadd float %117, -1.000000e+00 %119 = fmul float %114, %114 %120 = fmul float %116, %116 %121 = fadd float %120, %119 %122 = fmul float %118, %118 %123 = fadd float %121, %122 %124 = call float @llvm.AMDGPU.rsq.clamped.f32(float %123) %125 = call float @llvm.minnum.f32(float %124, float 0x47EFFFFFE0000000) %126 = fmul float %114, %125 %127 = fmul float %116, %125 %128 = fmul float %118, %125 %129 = fmul float %126, %92 %130 = fmul float %127, %93 %131 = fadd float %130, %129 %132 = fmul float %128, %94 %133 = fadd float %131, %132 %134 = fmul float %133, %126 %135 = fmul float %133, %127 %136 = fmul float %133, %128 %137 = fmul float %134, 2.000000e+00 %138 = fsub float %137, %92 %139 = fmul float %135, 2.000000e+00 %140 = fsub float %139, %93 %141 = fmul float %136, 2.000000e+00 %142 = fsub float %141, %94 %143 = bitcast float %48 to i32 %144 = icmp eq i32 %143, 0 br i1 %144, label %ENDIF, label %IF IF: ; preds = %main_body %145 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %146 = load <16 x i8>, <16 x i8> addrspace(2)* %145, align 16, !tbaa !0 %147 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %148 = load <32 x i8>, <32 x i8> addrspace(2)* %147, align 32, !tbaa !0 %149 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3844) %150 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %151 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %152 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %153 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %154 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %155 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %156 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %157 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %158 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %159 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %160 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %161 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %162 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %163 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %164 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %165 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %166 = fmul float %158, %82 %167 = fmul float %157, %82 %168 = fmul float %156, %82 %169 = fmul float %161, %81 %170 = fadd float %169, %166 %171 = fmul float %160, %81 %172 = fadd float %171, %167 %173 = fmul float %159, %81 %174 = fadd float %173, %168 %175 = fmul float %155, %83 %176 = fadd float %175, %170 %177 = fmul float %154, %83 %178 = fadd float %177, %172 %179 = fmul float %153, %83 %180 = fadd float %179, %174 %181 = fmul float %152, %84 %182 = fadd float %181, %176 %183 = fmul float %151, %84 %184 = fadd float %183, %178 %185 = fmul float %150, %84 %186 = fadd float %185, %180 %187 = fdiv float 1.000000e+00, %186 %188 = fmul float %187, %182 %189 = fmul float %187, %184 %190 = fmul float %188, %165 %191 = fadd float %190, %162 %192 = fmul float %189, %164 %193 = fadd float %192, %163 %194 = bitcast float %191 to i32 %195 = bitcast float %193 to i32 %196 = insertelement <2 x i32> undef, i32 %194, i32 0 %197 = insertelement <2 x i32> %196, i32 %195, i32 1 %198 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %197, <32 x i8> %148, <16 x i8> %146, i32 2) %199 = extractelement <4 x float> %198, i32 0 %200 = extractelement <4 x float> %198, i32 1 %201 = extractelement <4 x float> %198, i32 2 %202 = bitcast float %149 to i32 %203 = icmp eq i32 %202, 0 br i1 %203, label %ENDIF40, label %IF41 ENDIF: ; preds = %main_body, %ENDIF40 %temp24.0 = phi float [ %356, %ENDIF40 ], [ 1.000000e+00, %main_body ] %temp25.0 = phi float [ %357, %ENDIF40 ], [ 1.000000e+00, %main_body ] %temp26.0 = phi float [ %358, %ENDIF40 ], [ 1.000000e+00, %main_body ] %204 = fsub float 1.000000e+00, %34 %205 = fsub float 1.000000e+00, %35 %206 = fsub float 1.000000e+00, %36 %207 = bitcast float %73 to i32 %208 = bitcast float %74 to i32 %209 = insertelement <2 x i32> undef, i32 %207, i32 0 %210 = insertelement <2 x i32> %209, i32 %208, i32 1 %211 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %210, <32 x i8> %57, <16 x i8> %60, i32 2) %212 = extractelement <4 x float> %211, i32 0 %213 = extractelement <4 x float> %211, i32 1 %214 = extractelement <4 x float> %211, i32 2 %215 = fmul float %212, 0x3FD3333340000000 %216 = fmul float %213, 0x3FE2E147A0000000 %217 = fadd float %216, %215 %218 = fmul float %214, 0x3FBC28F5C0000000 %219 = fadd float %217, %218 %220 = call float @llvm.AMDGPU.lrp(float %40, float %219, float %212) %221 = call float @llvm.AMDGPU.lrp(float %40, float %219, float %213) %222 = call float @llvm.AMDGPU.lrp(float %40, float %219, float %214) %223 = fmul float %114, %92 %224 = fmul float %116, %93 %225 = fadd float %224, %223 %226 = fmul float %118, %94 %227 = fadd float %225, %226 %228 = call float @llvm.AMDIL.clamp.(float %227, float 0.000000e+00, float 1.000000e+00) %229 = fadd float %228, 0xBEB0C6F7A0000000 %230 = call float @fabs(float %228) %231 = call float @llvm.pow.f32(float %230, float %41) %232 = call float @llvm.AMDGPU.cndlt(float %229, float 0.000000e+00, float %231) %233 = fmul float %232, %37 %234 = fmul float %232, %38 %235 = fmul float %232, %39 %236 = bitcast float %73 to i32 %237 = bitcast float %74 to i32 %238 = insertelement <2 x i32> undef, i32 %236, i32 0 %239 = insertelement <2 x i32> %238, i32 %237, i32 1 %240 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %239, <32 x i8> %63, <16 x i8> %66, i32 2) %241 = extractelement <4 x float> %240, i32 1 %242 = fmul float %232, %241 %243 = fmul float %220, %233 %244 = fsub float %243, %212 %245 = fmul float %221, %234 %246 = fsub float %245, %213 %247 = fmul float %222, %235 %248 = fsub float %247, %214 %249 = fmul float %242, %244 %250 = fadd float %249, %212 %251 = fmul float %242, %246 %252 = fadd float %251, %213 %253 = fmul float %242, %248 %254 = fadd float %253, %214 %255 = fmul float %250, 0x3FD3333340000000 %256 = fmul float %252, 0x3FE2E147A0000000 %257 = fadd float %256, %255 %258 = fmul float %254, 0x3FBC28F5C0000000 %259 = fadd float %257, %258 %260 = call float @llvm.AMDGPU.lrp(float %42, float %259, float %250) %261 = call float @llvm.AMDGPU.lrp(float %42, float %259, float %252) %262 = call float @llvm.AMDGPU.lrp(float %42, float %259, float %254) %263 = fmul float %260, %43 %264 = fmul float %261, %43 %265 = fmul float %262, %43 %266 = fmul float %204, %263 %267 = fmul float %205, %264 %268 = fmul float %206, %265 %269 = fmul float %266, %29 %270 = fadd float %269, %26 %271 = fmul float %267, %29 %272 = fadd float %271, %27 %273 = fmul float %268, %29 %274 = fadd float %273, %28 %275 = bitcast float %73 to i32 %276 = bitcast float %74 to i32 %277 = insertelement <2 x i32> undef, i32 %275, i32 0 %278 = insertelement <2 x i32> %277, i32 %276, i32 1 %279 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %278, <32 x i8> %69, <16 x i8> %72, i32 2) %280 = extractelement <4 x float> %279, i32 0 %281 = extractelement <4 x float> %279, i32 1 %282 = extractelement <4 x float> %279, i32 2 %283 = fmul float %280, %44 %284 = fmul float %281, %44 %285 = fmul float %282, %44 %286 = fmul float %283, %43 %287 = fmul float %284, %43 %288 = fmul float %285, %43 %289 = fmul float %286, %33 %290 = fadd float %289, %30 %291 = fmul float %287, %33 %292 = fadd float %291, %31 %293 = fmul float %288, %33 %294 = fadd float %293, %32 %295 = fmul float %126, %102 %296 = fmul float %127, %103 %297 = fadd float %296, %295 %298 = fmul float %128, %104 %299 = fadd float %297, %298 %300 = call float @llvm.AMDIL.clamp.(float %299, float 0.000000e+00, float 1.000000e+00) %301 = fadd float %300, 0xBEB0C6F7A0000000 %302 = call float @llvm.AMDGPU.cndlt(float %301, float 0.000000e+00, float %300) %303 = fmul float %138, %102 %304 = fmul float %140, %103 %305 = fadd float %304, %303 %306 = fmul float %142, %104 %307 = fadd float %305, %306 %308 = call float @llvm.AMDIL.clamp.(float %307, float 0.000000e+00, float 1.000000e+00) %309 = fadd float %308, 0xBEB0C6F7A0000000 %310 = call float @fabs(float %308) %311 = call float @llvm.pow.f32(float %310, float 1.500000e+01) %312 = fmul float %311, 0x3FED48D5A0000000 %313 = fmul float %290, %312 %314 = fmul float %292, %312 %315 = fmul float %294, %312 %316 = call float @llvm.AMDGPU.cndlt(float %309, float 0.000000e+00, float %313) %317 = call float @llvm.AMDGPU.cndlt(float %309, float 0.000000e+00, float %314) %318 = call float @llvm.AMDGPU.cndlt(float %309, float 0.000000e+00, float %315) %319 = fmul float %270, %302 %320 = fadd float %319, %316 %321 = fmul float %272, %302 %322 = fadd float %321, %317 %323 = fmul float %274, %302 %324 = fadd float %323, %318 %325 = fmul float %temp24.0, %320 %326 = fmul float %temp25.0, %322 %327 = fmul float %temp26.0, %324 %328 = fmul float %325, %45 %329 = fmul float %326, %46 %330 = fmul float %327, %47 %331 = call i32 @llvm.SI.packf16(float %328, float %329) %332 = bitcast i32 %331 to float %333 = call i32 @llvm.SI.packf16(float %330, float 0.000000e+00) %334 = bitcast i32 %333 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %332, float %334, float %332, float %334) ret void IF41: ; preds = %IF %335 = extractelement <4 x float> %198, i32 3 %336 = fmul float %81, %81 %337 = fmul float %82, %82 %338 = fadd float %337, %336 %339 = fmul float %83, %83 %340 = fadd float %338, %339 %341 = call float @fabs(float %340) %342 = call float @llvm.AMDGPU.rsq.clamped.f32(float %341) %343 = call float @llvm.minnum.f32(float %342, float 0x47EFFFFFE0000000) %344 = fdiv float 1.000000e+00, %343 %345 = fsub float %24, %344 %346 = fmul float %345, %25 %347 = call float @llvm.AMDIL.clamp.(float %346, float 0.000000e+00, float 1.000000e+00) %348 = fmul float %347, %347 %349 = fadd float %335, -1.000000e+00 %350 = fmul float %348, %349 %351 = fadd float %350, 1.000000e+00 %352 = fmul float %348, %349 %353 = fadd float %352, 1.000000e+00 %354 = fmul float %348, %349 %355 = fadd float %354, 1.000000e+00 br label %ENDIF40 ENDIF40: ; preds = %IF, %IF41 %temp28.0 = phi float [ %351, %IF41 ], [ 1.000000e+00, %IF ] %temp29.0 = phi float [ %353, %IF41 ], [ 1.000000e+00, %IF ] %temp30.0 = phi float [ %355, %IF41 ], [ 1.000000e+00, %IF ] %356 = fmul float %199, %temp28.0 %357 = fmul float %200, %temp29.0 %358 = fmul float %201, %temp30.0 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_movk_i32 s8, 0xf00 ; B0080F00 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s10, s[0:3], s8 ; C2050008 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900 v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901 v_interp_p1_f32 v8, v0, 2, 2, [m0] ; C8200A00 v_interp_p2_f32 v8, [v8], v1, 2, 2, [m0] ; C8210A01 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[13:15], 7, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800700 00640D03 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v11, 2.0, v13, -1.0 ; D282000B 03CE1AF4 v_mad_f32 v12, 2.0, v14, -1.0 ; D282000C 03CE1CF4 v_mad_f32 v13, 2.0, v15, -1.0 ; D282000D 03CE1EF4 v_mul_f32_e32 v14, v10, v10 ; 101C150A v_mad_f32 v14, v9, v9, v14 ; D282000E 043A1309 v_mad_f32 v14, v8, v8, v14 ; D282000E 043A1108 v_rsq_clamp_f32_e32 v17, v14 ; 7E22590E v_mul_f32_e32 v14, v5, v5 ; 101C0B05 v_mad_f32 v14, v6, v6, v14 ; D282000E 043A0D06 s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113 s_buffer_load_dword s9, s[0:3], 0x17 ; C2048117 v_mad_f32 v14, v7, v7, v14 ; D282000E 043A0F07 v_rsq_clamp_f32_e32 v16, v14 ; 7E20590E v_mul_f32_e32 v14, v11, v11 ; 101C170B v_mad_f32 v14, v12, v12, v14 ; D282000E 043A190C v_mad_f32 v14, v13, v13, v14 ; D282000E 043A1B0D v_rsq_clamp_f32_e32 v18, v14 ; 7E24590E v_cmp_ne_i32_e64 s[10:11], 0, s10 ; D10A000A 00001480 v_mov_b32_e32 v14, 1.0 ; 7E1C02F2 v_mov_b32_e32 v15, 1.0 ; 7E1E02F2 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[10:11], s[10:11] ; BE8A240A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E s_cbranch_execz BB0_4 ; BF880000 v_interp_p1_f32 v2, v0, 0, 3, [m0] ; C8080C00 v_interp_p2_f32 v2, [v2], v1, 0, 3, [m0] ; C8090C01 v_interp_p1_f32 v14, v0, 1, 3, [m0] ; C8380D00 v_interp_p2_f32 v14, [v14], v1, 1, 3, [m0] ; C8390D01 v_interp_p1_f32 v15, v0, 2, 3, [m0] ; C83C0E00 v_interp_p2_f32 v15, [v15], v1, 2, 3, [m0] ; C83D0E01 s_buffer_load_dword s12, s[0:3], 0x19 ; C2060119 s_buffer_load_dword s13, s[0:3], 0x1b ; C206811B s_buffer_load_dword s14, s[0:3], 0x1c ; C207011C s_buffer_load_dword s15, s[0:3], 0x1d ; C207811D s_buffer_load_dword s16, s[0:3], 0x4 ; C2080104 s_buffer_load_dword s17, s[0:3], 0x5 ; C2088105 s_buffer_load_dword s18, s[0:3], 0x6 ; C2090106 s_buffer_load_dword s19, s[0:3], 0x7 ; C2098107 s_buffer_load_dword s20, s[0:3], 0x18 ; C20A0118 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 s_buffer_load_dword s21, s[0:3], 0x27 ; C20A8127 s_buffer_load_dword s22, s[0:3], 0x1f ; C20B011F s_buffer_load_dword s23, s[0:3], 0x20 ; C20B8120 s_buffer_load_dword s24, s[0:3], 0x21 ; C20C0121 s_buffer_load_dword s25, s[0:3], 0x23 ; C20C8123 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s14, v14 ; 10021C0E v_mul_f32_e32 v19, s15, v14 ; 10261C0F v_mad_f32 v1, s20, v2, v1 ; D2820001 04060414 v_mad_f32 v19, s12, v2, v19 ; D2820013 044E040C s_buffer_load_dword s12, s[0:3], 0x24 ; C2060124 s_buffer_load_dword s14, s[0:3], 0x25 ; C2070125 v_mul_f32_e32 v20, s22, v14 ; 10281C16 v_mad_f32 v20, s13, v2, v20 ; D2820014 0452040D v_mad_f32 v1, s23, v15, v1 ; D2820001 04061E17 v_mad_f32 v20, s25, v15, v20 ; D2820014 04521E19 v_mad_f32 v20, s21, v0, v20 ; D2820014 04520015 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_movk_i32 s13, 0xf04 ; B00D0F04 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_buffer_load_dword s13, s[0:3], s13 ; C206800D v_rcp_f32_e32 v20, v20 ; 7E285514 v_mad_f32 v19, s24, v15, v19 ; D2820013 044E1E18 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s12, v0, v1 ; D2820001 0406000C v_mad_f32 v0, s14, v0, v19 ; D2820000 044E000E v_mul_f32_e32 v1, v1, v20 ; 10022901 v_mul_f32_e32 v0, v0, v20 ; 10002900 v_mov_b32_e32 v19, s19 ; 7E260213 v_mad_f32 v19, s16, v1, v19 ; D2820013 044E0210 v_mov_b32_e32 v1, s18 ; 7E020212 v_mad_f32 v20, s17, v0, v1 ; D2820014 04060011 image_sample v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[28:35], s[20:23] ; F0800F00 00A71313 v_cmp_ne_i32_e64 s[12:13], 0, s13 ; D10A000C 00001A80 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[12:13], s[12:13] ; BE8C240C s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_cbranch_execz BB0_5 ; BF880000 s_buffer_load_dword s14, s[0:3], 0x0 ; C2070100 s_buffer_load_dword s15, s[0:3], 0x1 ; C2078101 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v2, v2 ; 10000502 v_mad_f32 v0, v14, v14, v0 ; D2820000 04021D0E v_mad_f32 v0, v15, v15, v0 ; D2820000 04021F0F v_rsq_clamp_f32_e64 v0, |v0| ; D3580100 00000100 v_add_f32_e32 v1, -1.0, v22 ; 06022CF3 v_min_f32_e32 v0, 0x7f7fffff, v0 ; 1E0000FF 7F7FFFFF v_rcp_f32_e32 v0, v0 ; 7E005500 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v0, s14, v0 ; 0800000E v_mul_f32_e32 v0, s15, v0 ; 1000000F v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mad_f32 v0, v0, v1, 1.0 ; D2820000 03CA0300 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E v_mul_f32_e32 v15, v0, v19 ; 101E2700 v_mul_f32_e32 v14, v0, v20 ; 101C2900 v_mul_f32_e32 v2, v0, v21 ; 10042B00 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E v_mov_b32_e32 v1, s8 ; 7E020208 v_mov_b32_e32 v0, s9 ; 7E000209 v_min_f32_e32 v23, 0x7f7fffff, v17 ; 1E2E22FF 7F7FFFFF v_min_f32_e32 v17, 0x7f7fffff, v18 ; 1E2224FF 7F7FFFFF s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110 s_buffer_load_dword s12, s[0:3], 0x11 ; C2060111 v_mul_f32_e32 v19, v23, v10 ; 10261517 s_buffer_load_dword s13, s[0:3], 0x12 ; C2068112 s_buffer_load_dword s10, s[0:3], 0x14 ; C2050114 s_buffer_load_dword s14, s[0:3], 0x15 ; C2070115 s_buffer_load_dword s15, s[0:3], 0x16 ; C2078116 s_buffer_load_dword s18, s[0:3], 0x28 ; C2090128 s_buffer_load_dword s17, s[0:3], 0x29 ; C2088129 s_buffer_load_dword s16, s[0:3], 0x2a ; C208012A s_buffer_load_dword s24, s[0:3], 0x2c ; C20C012C s_buffer_load_dword s22, s[0:3], 0x2d ; C20B012D s_buffer_load_dword s23, s[0:3], 0x2e ; C20B812E s_buffer_load_dword s26, s[0:3], 0x31 ; C20D0131 s_buffer_load_dword s25, s[0:3], 0x32 ; C20C8132 s_buffer_load_dword s20, s[0:3], 0x33 ; C20A0133 s_buffer_load_dword s19, s[0:3], 0x34 ; C2098134 s_buffer_load_dword s21, s[0:3], 0x35 ; C20A8135 s_buffer_load_dword s8, s[0:3], 0x38 ; C2040138 s_buffer_load_dword s9, s[0:3], 0x39 ; C2048139 s_buffer_load_dword s0, s[0:3], 0x3a ; C200013A v_mul_f32_e32 v20, v17, v11 ; 10281711 v_mul_f32_e32 v18, v17, v12 ; 10241911 v_mul_f32_e32 v17, v17, v13 ; 10221B11 v_mul_f32_e32 v21, v23, v9 ; 102A1317 v_mul_f32_e32 v22, v19, v20 ; 102C2913 v_mad_f32 v24, v18, v21, v22 ; D2820018 045A2B12 v_mul_f32_e32 v22, v23, v8 ; 102C1117 v_mad_f32 v24, v17, v22, v24 ; D2820018 04622D11 v_mul_f32_e32 v25, v20, v24 ; 10323114 v_mad_f32 v25, v24, v20, v25 ; D2820019 04662918 v_mad_f32 v10, -v10, v23, v25 ; D282000A 24662F0A v_mul_f32_e32 v25, v18, v24 ; 10323112 s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 s_load_dwordx4 s[28:31], s[4:5], 0xc ; C08E050C s_load_dwordx4 s[32:35], s[4:5], 0x10 ; C0900510 s_load_dwordx8 s[56:63], s[6:7], 0x10 ; C0DC0710 s_load_dwordx8 s[40:47], s[6:7], 0x18 ; C0D40718 s_load_dwordx8 s[48:55], s[6:7], 0x20 ; C0D80720 v_mad_f32 v25, v24, v18, v25 ; D2820019 04662518 v_mad_f32 v9, -v9, v23, v25 ; D2820009 24662F09 v_mul_f32_e32 v25, v17, v24 ; 10323111 v_mad_f32 v24, v24, v17, v25 ; D2820018 04662318 v_mad_f32 v8, -v8, v23, v24 ; D2820008 24622F08 v_min_f32_e32 v23, 0x7f7fffff, v16 ; 1E2E20FF 7F7FFFFF v_mul_f32_e32 v16, v23, v5 ; 10200B17 v_mul_f32_e32 v6, v23, v6 ; 100C0D17 v_mul_f32_e32 v5, v23, v7 ; 100A0F17 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[56:63], s[36:39] ; F0800700 012E1703 v_mul_f32_e32 v7, v19, v11 ; 100E1713 v_mad_f32 v7, v12, v21, v7 ; D2820007 041E2B0C v_mad_f32 v7, v13, v22, v7 ; D2820007 041E2D0D image_sample v11, 2, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[40:47], s[28:31] ; F0800200 00EA0B03 image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[48:55], s[32:35] ; F0800700 010C1A03 v_add_f32_e64 v3, 0, v7 clamp ; D2060803 00020E80 v_mul_f32_e32 v4, v16, v20 ; 10082910 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v7, 0x3e99999a, v23 ; 100E2EFF 3E99999A v_mov_b32_e32 v12, 0x3f170a3d ; 7E1802FF 3F170A3D v_mad_f32 v7, v24, v12, v7 ; D2820007 041E1918 v_mov_b32_e32 v13, 0x3de147ae ; 7E1A02FF 3DE147AE v_mad_f32 v7, v25, v13, v7 ; D2820007 041E1B19 v_sub_f32_e64 v19, 1.0, s26 ; D2080013 000034F2 v_mul_f32_e32 v20, v23, v19 ; 10282717 v_mad_f32 v20, s26, v7, v20 ; D2820014 04520E1A v_mul_f32_e32 v21, v24, v19 ; 102A2718 v_mad_f32 v21, s26, v7, v21 ; D2820015 04560E1A v_mul_f32_e32 v19, v25, v19 ; 10262719 v_mad_f32 v7, s26, v7, v19 ; D2820007 044E0E1A v_mov_b32_e32 v19, 0x7fffffff ; 7E2602FF 7FFFFFFF v_and_b32_e32 v22, v3, v19 ; 362C2703 v_log_f32_e32 v22, v22 ; 7E2C4F16 v_mov_b32_e32 v29, 0xb58637bd ; 7E3A02FF B58637BD v_add_f32_e32 v3, v29, v3 ; 0606071D v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_mul_legacy_f32_e32 v3, s25, v22 ; 0E062C19 v_exp_f32_e32 v3, v3 ; 7E064B03 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_mul_f32_e32 v22, s24, v3 ; 102C0618 v_mad_f32 v20, v20, v22, -v23 ; D2820014 845E2D14 v_mul_f32_e32 v22, s22, v3 ; 102C0616 v_mad_f32 v21, v21, v22, -v24 ; D2820015 84622D15 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v11, v11, v3 ; 1016070B v_mul_f32_e32 v3, s23, v3 ; 10060617 v_mad_f32 v3, v7, v3, -v25 ; D2820003 84660707 v_mad_f32 v7, v11, v20, v23 ; D2820007 045E290B v_mad_f32 v20, v11, v21, v24 ; D2820014 04622B0B v_mad_f32 v3, v11, v3, v25 ; D2820003 0466070B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v11, s21, v26 ; 10163415 v_mul_f32_e32 v21, s21, v27 ; 102A3615 v_mul_f32_e32 v22, s21, v28 ; 102C3815 v_mul_f32_e32 v23, 0x3e99999a, v7 ; 102E0EFF 3E99999A v_mad_f32 v12, v20, v12, v23 ; D282000C 045E1914 v_mad_f32 v12, v3, v13, v12 ; D282000C 04321B03 v_sub_f32_e64 v13, 1.0, s20 ; D208000D 000028F2 v_mul_f32_e32 v7, v7, v13 ; 100E1B07 v_mul_f32_e32 v20, v20, v13 ; 10281B14 v_mul_f32_e32 v3, v3, v13 ; 10061B03 v_mad_f32 v7, s20, v12, v7 ; D2820007 041E1814 v_mad_f32 v13, s20, v12, v20 ; D282000D 04521814 v_mad_f32 v3, s20, v12, v3 ; D2820003 040E1814 v_mul_f32_e32 v7, s19, v7 ; 100E0E13 v_sub_f32_e64 v12, 1.0, s18 ; D208000C 000024F2 v_mul_f32_e32 v7, v7, v12 ; 100E1907 v_mul_f32_e32 v12, s19, v13 ; 10181A13 v_sub_f32_e64 v13, 1.0, s17 ; D208000D 000022F2 v_mul_f32_e32 v12, v12, v13 ; 10181B0C v_mul_f32_e32 v3, s19, v3 ; 10060613 v_sub_f32_e64 v13, 1.0, s16 ; D208000D 000020F2 v_mul_f32_e32 v3, v3, v13 ; 10061B03 v_mad_f32 v7, v1, v7, s11 ; D2820007 002E0F01 v_mad_f32 v12, v12, v1, s12 ; D282000C 0032030C v_mad_f32 v1, v3, v1, s13 ; D2820001 00360303 v_mul_f32_e32 v3, s19, v11 ; 10061613 v_mul_f32_e32 v11, s19, v21 ; 10162A13 v_mul_f32_e32 v13, s19, v22 ; 101A2C13 v_mad_f32 v3, v0, v3, s10 ; D2820003 002A0700 v_mad_f32 v11, v11, v0, s14 ; D282000B 003A010B v_mad_f32 v0, v13, v0, s15 ; D2820000 003E010D v_mad_f32 v4, v18, v6, v4 ; D2820004 04120D12 v_mul_f32_e32 v10, v16, v10 ; 10141510 v_mad_f32 v6, v9, v6, v10 ; D2820006 042A0D09 v_mad_f32 v4, v17, v5, v4 ; D2820004 04120B11 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_mad_f32 v5, v8, v5, v6 ; D2820005 041A0B08 v_add_f32_e32 v6, v29, v4 ; 060C091D v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_and_b32_e32 v8, v5, v19 ; 36102705 v_log_f32_e32 v8, v8 ; 7E104F08 v_cmp_gt_f32_e32 vcc, 0, v6 ; 7C080C80 v_cndmask_b32_e64 v4, v4, 0, vcc ; D2000004 01A90104 v_add_f32_e32 v5, v29, v5 ; 060A0B1D v_mul_legacy_f32_e32 v6, 0x41700000, v8 ; 0E0C10FF 41700000 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_mul_f32_e32 v6, 0x3f6a46ad, v6 ; 100C0CFF 3F6A46AD v_mul_f32_e32 v3, v6, v3 ; 10060706 v_mul_f32_e32 v8, v6, v11 ; 10101706 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_cmp_gt_f32_e32 vcc, 0, v5 ; 7C080A80 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_cndmask_b32_e64 v5, v8, 0, vcc ; D2000005 01A90108 v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 v_mad_f32 v3, v7, v4, v3 ; D2820003 040E0907 v_mad_f32 v5, v12, v4, v5 ; D2820005 0416090C v_mad_f32 v0, v1, v4, v0 ; D2820000 04020901 v_mul_f32_e32 v1, v3, v15 ; 10021F03 v_mul_f32_e32 v3, v5, v14 ; 10061D05 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_mul_f32_e32 v1, s8, v1 ; 10020208 v_mul_f32_e32 v2, s9, v3 ; 10040609 v_mul_f32_e32 v0, s0, v0 ; 10000000 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 32 Code Size: 1556 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL IN[2], TEXCOORD[4], PERSPECTIVE DCL IN[3], TEXCOORD[5], PERSPECTIVE DCL IN[4], TEXCOORD[6], PERSPECTIVE DCL IN[5], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..7] IMM[0] FLT32 { 2.0000, -1.0000, -0.0000, 0.0000} IMM[1] FLT32 { -0.3333, 8.0000, 0.0398, 0.0000} IMM[2] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[4], IN[4] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[4], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[2], IN[2] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[2], TEMP[0].xxxx 8: TEX TEMP[3], IN[1], SAMP[1], 2D 9: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[0].x, TEMP[3], TEMP[3] 11: RSQ TEMP[0].x, TEMP[0].xxxx 12: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 13: MUL TEMP[4].xyz, TEMP[3], TEMP[0].xxxx 14: DP3 TEMP[1].w, TEMP[4], TEMP[1] 15: MUL TEMP[3].xyz, TEMP[1].wwww, TEMP[4] 16: MAD TEMP[1].xyz, TEMP[3], IMM[0].xxxx, -TEMP[1] 17: TEX TEMP[3], IN[0], SAMP[5], 2D 18: ADD TEMP[1].w, TEMP[3].xxxx, CONST[8].xxxx 19: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[8].yyyy 20: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].zzzz 21: POW TEMP[3].x, |TEMP[1].wwww|, CONST[8].zzzz 22: CMP TEMP[3].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[3].xxxx 23: DP3 TEMP[1].w, IN[3], IN[3] 24: RSQ TEMP[0], |TEMP[1].wwww| 25: MIN TEMP[2].w, IMM[2].xxxx, TEMP[0] 26: MUL TEMP[5].xyz, TEMP[2].wwww, IN[3] 27: DP3 TEMP[2].w, TEMP[5], -CONST[9] 28: ADD TEMP[2].w, TEMP[2].wwww, -CONST[10].xxxx 29: MUL_SAT TEMP[2].w, TEMP[2].wwww, CONST[10].yyyy 30: MUL TEMP[2].w, TEMP[2].wwww, TEMP[2].wwww 31: ADD TEMP[1].w, -TEMP[1].wwww, -IMM[0].yyyy 32: MUL TEMP[3].w, TEMP[3].zzzz, TEMP[1].wwww 33: MUL TEMP[5], TEMP[2].wwww, TEMP[3].wwww 34: MOV TEMP[6], TEMP[5] 35: KILL_IF TEMP[6] 36: TEX TEMP[6], IN[1], SAMP[4], 2D 37: MOV TEMP[5].xy, CONST[6] 38: MAD TEMP[3].w, TEMP[6].xxxx, TEMP[5].yyyy, IMM[1].xxxx 39: CMP TEMP[6], -TEMP[5].wwww, TEMP[3].wwww, IMM[0].wwww 40: KILL_IF TEMP[6] 41: UIF CONST[240].xxxx :47 42: RCP TEMP[3].w, IN[5].wwww 43: MUL TEMP[5].yz, TEMP[3].wwww, IN[5].xxyw 44: MAD TEMP[5].yz, TEMP[5], CONST[1].xxyw, CONST[1].xwzw 45: TEX TEMP[6], TEMP[5].yzzw, SAMP[0], 2D 46: MUL TEMP[3].xyz, TEMP[3].zzzz, TEMP[6] 47: ENDIF 48: MOV TEMP[5].y, IMM[0].yyyy 49: ADD TEMP[6].xyz, -TEMP[5].yyyy, -CONST[0] 50: TEX TEMP[7], IN[1], SAMP[2], 2D 51: MUL TEMP[6].xyz, TEMP[6], TEMP[7] 52: MAD TEMP[6].xyz, TEMP[6], CONST[4].wwww, CONST[4] 53: MAX TEMP[3].w, TEMP[1].wwww, IMM[0].wwww 54: ADD TEMP[1].w, TEMP[3].wwww, IMM[0].zzzz 55: POW TEMP[4].w, |TEMP[3].wwww|, CONST[7].wwww 56: TEX TEMP[7], IN[1], SAMP[3], 2D 57: MAD TEMP[7].xyz, TEMP[7], CONST[5].wwww, CONST[5] 58: DP3_SAT TEMP[3].w, TEMP[4], TEMP[2] 59: ADD TEMP[4].x, TEMP[3].wwww, IMM[0].zzzz 60: DP3_SAT TEMP[1].x, TEMP[1], TEMP[2] 61: ADD TEMP[1].y, TEMP[1].xxxx, IMM[0].zzzz 62: POW TEMP[2].x, |TEMP[1].xxxx|, CONST[6].xxxx 63: ADD TEMP[1].x, TEMP[5].xxxx, IMM[1].yyyy 64: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 65: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz 66: MUL TEMP[2].xyz, TEMP[3].wwww, TEMP[6] 67: CMP TEMP[2].xyz, TEMP[4].xxxx, IMM[0].wwww, TEMP[2] 68: MUL TEMP[4].xyz, TEMP[7], TEMP[1].xxxx 69: CMP TEMP[1], TEMP[1].yyyw, IMM[0].wwww, TEMP[4] 70: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 71: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1] 72: MUL TEMP[1].xyz, TEMP[3], TEMP[1] 73: MUL TEMP[1].xyz, TEMP[1], CONST[7] 74: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[1] 75: CMP OUT[0].xyz, -TEMP[5].wwww, TEMP[1], IMM[0].wwww 76: MOV OUT[0].w, IMM[0].wwww 77: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %50 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %51 = load <32 x i8>, <32 x i8> addrspace(2)* %50, align 32, !tbaa !0 %52 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %55 = bitcast <8 x i32> addrspace(2)* %54 to <32 x i8> addrspace(2)* %56 = load <32 x i8>, <32 x i8> addrspace(2)* %55, align 32, !tbaa !0 %57 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %58 = bitcast <4 x i32> addrspace(2)* %57 to <16 x i8> addrspace(2)* %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 %60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %61 = bitcast <8 x i32> addrspace(2)* %60 to <32 x i8> addrspace(2)* %62 = load <32 x i8>, <32 x i8> addrspace(2)* %61, align 32, !tbaa !0 %63 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %64 = bitcast <4 x i32> addrspace(2)* %63 to <16 x i8> addrspace(2)* %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0 %66 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %67 = bitcast <8 x i32> addrspace(2)* %66 to <32 x i8> addrspace(2)* %68 = load <32 x i8>, <32 x i8> addrspace(2)* %67, align 32, !tbaa !0 %69 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %70 = bitcast <4 x i32> addrspace(2)* %69 to <16 x i8> addrspace(2)* %71 = load <16 x i8>, <16 x i8> addrspace(2)* %70, align 16, !tbaa !0 %72 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %73 = bitcast <8 x i32> addrspace(2)* %72 to <32 x i8> addrspace(2)* %74 = load <32 x i8>, <32 x i8> addrspace(2)* %73, align 32, !tbaa !0 %75 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %76 = bitcast <4 x i32> addrspace(2)* %75 to <16 x i8> addrspace(2)* %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !tbaa !0 %78 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %79 = bitcast <8 x i32> addrspace(2)* %78 to <32 x i8> addrspace(2)* %80 = load <32 x i8>, <32 x i8> addrspace(2)* %79, align 32, !tbaa !0 %81 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %82 = bitcast <4 x i32> addrspace(2)* %81 to <16 x i8> addrspace(2)* %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 %84 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %91 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %97 = fmul float %94, %94 %98 = fmul float %95, %95 %99 = fadd float %98, %97 %100 = fmul float %96, %96 %101 = fadd float %99, %100 %102 = call float @llvm.AMDGPU.rsq.clamped.f32(float %101) %103 = call float @llvm.minnum.f32(float %102, float 0x47EFFFFFE0000000) %104 = fmul float %94, %103 %105 = fmul float %95, %103 %106 = fmul float %96, %103 %107 = fmul float %88, %88 %108 = fmul float %89, %89 %109 = fadd float %108, %107 %110 = fmul float %90, %90 %111 = fadd float %109, %110 %112 = call float @llvm.AMDGPU.rsq.clamped.f32(float %111) %113 = call float @llvm.minnum.f32(float %112, float 0x47EFFFFFE0000000) %114 = fmul float %88, %113 %115 = fmul float %89, %113 %116 = fmul float %90, %113 %117 = bitcast float %86 to i32 %118 = bitcast float %87 to i32 %119 = insertelement <2 x i32> undef, i32 %117, i32 0 %120 = insertelement <2 x i32> %119, i32 %118, i32 1 %121 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %120, <32 x i8> %56, <16 x i8> %59, i32 2) %122 = extractelement <4 x float> %121, i32 0 %123 = extractelement <4 x float> %121, i32 1 %124 = extractelement <4 x float> %121, i32 2 %125 = fmul float %122, 2.000000e+00 %126 = fadd float %125, -1.000000e+00 %127 = fmul float %123, 2.000000e+00 %128 = fadd float %127, -1.000000e+00 %129 = fmul float %124, 2.000000e+00 %130 = fadd float %129, -1.000000e+00 %131 = fmul float %126, %126 %132 = fmul float %128, %128 %133 = fadd float %132, %131 %134 = fmul float %130, %130 %135 = fadd float %133, %134 %136 = call float @llvm.AMDGPU.rsq.clamped.f32(float %135) %137 = call float @llvm.minnum.f32(float %136, float 0x47EFFFFFE0000000) %138 = fmul float %126, %137 %139 = fmul float %128, %137 %140 = fmul float %130, %137 %141 = fmul float %138, %104 %142 = fmul float %139, %105 %143 = fadd float %142, %141 %144 = fmul float %140, %106 %145 = fadd float %143, %144 %146 = fmul float %145, %138 %147 = fmul float %145, %139 %148 = fmul float %145, %140 %149 = fmul float %146, 2.000000e+00 %150 = fsub float %149, %104 %151 = fmul float %147, 2.000000e+00 %152 = fsub float %151, %105 %153 = fmul float %148, 2.000000e+00 %154 = fsub float %153, %106 %155 = bitcast float %84 to i32 %156 = bitcast float %85 to i32 %157 = insertelement <2 x i32> undef, i32 %155, i32 0 %158 = insertelement <2 x i32> %157, i32 %156, i32 1 %159 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %80, <16 x i8> %83, i32 2) %160 = extractelement <4 x float> %159, i32 0 %161 = fadd float %160, %41 %162 = fmul float %161, %42 %163 = call float @llvm.AMDIL.clamp.(float %162, float 0.000000e+00, float 1.000000e+00) %164 = fadd float %163, 0xBEB0C6F7A0000000 %165 = call float @fabs(float %163) %166 = call float @llvm.pow.f32(float %165, float %43) %167 = call float @llvm.AMDGPU.cndlt(float %164, float 0.000000e+00, float %166) %168 = call float @llvm.AMDGPU.cndlt(float %164, float 0.000000e+00, float %166) %169 = call float @llvm.AMDGPU.cndlt(float %164, float 0.000000e+00, float %166) %170 = fmul float %91, %91 %171 = fmul float %92, %92 %172 = fadd float %171, %170 %173 = fmul float %93, %93 %174 = fadd float %172, %173 %175 = call float @fabs(float %174) %176 = call float @llvm.AMDGPU.rsq.clamped.f32(float %175) %177 = call float @llvm.minnum.f32(float %176, float 0x47EFFFFFE0000000) %178 = fmul float %177, %91 %179 = fmul float %177, %92 %180 = fmul float %177, %93 %181 = fmul float %44, %178 %182 = fsub float -0.000000e+00, %181 %183 = fmul float %45, %179 %184 = fsub float %182, %183 %185 = fmul float %46, %180 %186 = fsub float %184, %185 %187 = fsub float %186, %47 %188 = fmul float %187, %48 %189 = call float @llvm.AMDIL.clamp.(float %188, float 0.000000e+00, float 1.000000e+00) %190 = fmul float %189, %189 %191 = fsub float 1.000000e+00, %174 %192 = fmul float %169, %191 %193 = fmul float %190, %192 %194 = fmul float %190, %192 %195 = fmul float %190, %192 %196 = fmul float %190, %192 %197 = fcmp olt float %193, 0.000000e+00 %198 = fcmp olt float %194, 0.000000e+00 %199 = fcmp olt float %195, 0.000000e+00 %200 = fcmp olt float %196, 0.000000e+00 %201 = or i1 %200, %199 %202 = or i1 %201, %198 %203 = or i1 %202, %197 %204 = select i1 %203, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %204) %205 = bitcast float %86 to i32 %206 = bitcast float %87 to i32 %207 = insertelement <2 x i32> undef, i32 %205, i32 0 %208 = insertelement <2 x i32> %207, i32 %206, i32 1 %209 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %208, <32 x i8> %74, <16 x i8> %77, i32 2) %210 = extractelement <4 x float> %209, i32 0 %211 = fmul float %210, %36 %212 = fadd float %211, 0xBFD554C980000000 %213 = fsub float -0.000000e+00, %196 %214 = call float @llvm.AMDGPU.cndlt(float %213, float %212, float 0.000000e+00) %215 = fsub float -0.000000e+00, %196 %216 = call float @llvm.AMDGPU.cndlt(float %215, float %212, float 0.000000e+00) %217 = fsub float -0.000000e+00, %196 %218 = call float @llvm.AMDGPU.cndlt(float %217, float %212, float 0.000000e+00) %219 = fsub float -0.000000e+00, %196 %220 = call float @llvm.AMDGPU.cndlt(float %219, float %212, float 0.000000e+00) %221 = fcmp olt float %214, 0.000000e+00 %222 = fcmp olt float %216, 0.000000e+00 %223 = fcmp olt float %218, 0.000000e+00 %224 = fcmp olt float %220, 0.000000e+00 %225 = or i1 %224, %223 %226 = or i1 %225, %222 %227 = or i1 %226, %221 %228 = select i1 %227, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %228) %229 = bitcast float %49 to i32 %230 = icmp eq i32 %229, 0 br i1 %230, label %ENDIF, label %IF IF: ; preds = %main_body %231 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %232 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %233 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %234 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %235 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %236 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %237 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %238 = fdiv float 1.000000e+00, %231 %239 = fmul float %238, %233 %240 = fmul float %238, %232 %241 = fmul float %239, %237 %242 = fadd float %241, %234 %243 = fmul float %240, %236 %244 = fadd float %243, %235 %245 = bitcast float %242 to i32 %246 = bitcast float %244 to i32 %247 = insertelement <2 x i32> undef, i32 %245, i32 0 %248 = insertelement <2 x i32> %247, i32 %246, i32 1 %249 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %248, <32 x i8> %51, <16 x i8> %53, i32 2) %250 = extractelement <4 x float> %249, i32 0 %251 = extractelement <4 x float> %249, i32 1 %252 = extractelement <4 x float> %249, i32 2 %253 = fmul float %169, %250 %254 = fmul float %169, %251 %255 = fmul float %169, %252 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp12.0 = phi float [ %253, %IF ], [ %167, %main_body ] %temp13.0 = phi float [ %254, %IF ], [ %168, %main_body ] %temp14.0 = phi float [ %255, %IF ], [ %169, %main_body ] %256 = fsub float 1.000000e+00, %24 %257 = fsub float 1.000000e+00, %25 %258 = fsub float 1.000000e+00, %26 %259 = bitcast float %86 to i32 %260 = bitcast float %87 to i32 %261 = insertelement <2 x i32> undef, i32 %259, i32 0 %262 = insertelement <2 x i32> %261, i32 %260, i32 1 %263 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %262, <32 x i8> %62, <16 x i8> %65, i32 2) %264 = extractelement <4 x float> %263, i32 0 %265 = extractelement <4 x float> %263, i32 1 %266 = extractelement <4 x float> %263, i32 2 %267 = fmul float %256, %264 %268 = fmul float %257, %265 %269 = fmul float %258, %266 %270 = fmul float %267, %30 %271 = fadd float %270, %27 %272 = fmul float %268, %30 %273 = fadd float %272, %28 %274 = fmul float %269, %30 %275 = fadd float %274, %29 %276 = call float @llvm.maxnum.f32(float %191, float 0.000000e+00) %277 = fadd float %276, 0xBEB0C6F7A0000000 %278 = call float @fabs(float %276) %279 = call float @llvm.pow.f32(float %278, float %40) %280 = bitcast float %86 to i32 %281 = bitcast float %87 to i32 %282 = insertelement <2 x i32> undef, i32 %280, i32 0 %283 = insertelement <2 x i32> %282, i32 %281, i32 1 %284 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %283, <32 x i8> %68, <16 x i8> %71, i32 2) %285 = extractelement <4 x float> %284, i32 0 %286 = extractelement <4 x float> %284, i32 1 %287 = extractelement <4 x float> %284, i32 2 %288 = fmul float %285, %34 %289 = fadd float %288, %31 %290 = fmul float %286, %34 %291 = fadd float %290, %32 %292 = fmul float %287, %34 %293 = fadd float %292, %33 %294 = fmul float %138, %114 %295 = fmul float %139, %115 %296 = fadd float %295, %294 %297 = fmul float %140, %116 %298 = fadd float %296, %297 %299 = call float @llvm.AMDIL.clamp.(float %298, float 0.000000e+00, float 1.000000e+00) %300 = fadd float %299, 0xBEB0C6F7A0000000 %301 = fmul float %150, %114 %302 = fmul float %152, %115 %303 = fadd float %302, %301 %304 = fmul float %154, %116 %305 = fadd float %303, %304 %306 = call float @llvm.AMDIL.clamp.(float %305, float 0.000000e+00, float 1.000000e+00) %307 = fadd float %306, 0xBEB0C6F7A0000000 %308 = call float @fabs(float %306) %309 = call float @llvm.pow.f32(float %308, float %35) %310 = fadd float %35, 8.000000e+00 %311 = fmul float %310, %309 %312 = fmul float %311, 0x3FA45F3060000000 %313 = fmul float %299, %271 %314 = fmul float %299, %273 %315 = fmul float %299, %275 %316 = call float @llvm.AMDGPU.cndlt(float %300, float 0.000000e+00, float %313) %317 = call float @llvm.AMDGPU.cndlt(float %300, float 0.000000e+00, float %314) %318 = call float @llvm.AMDGPU.cndlt(float %300, float 0.000000e+00, float %315) %319 = fmul float %289, %312 %320 = fmul float %291, %312 %321 = fmul float %293, %312 %322 = call float @llvm.AMDGPU.cndlt(float %307, float 0.000000e+00, float %319) %323 = call float @llvm.AMDGPU.cndlt(float %307, float 0.000000e+00, float %320) %324 = call float @llvm.AMDGPU.cndlt(float %307, float 0.000000e+00, float %321) %325 = call float @llvm.AMDGPU.cndlt(float %277, float 0.000000e+00, float %279) %326 = fadd float %322, %316 %327 = fadd float %323, %317 %328 = fadd float %324, %318 %329 = fmul float %325, %326 %330 = fmul float %325, %327 %331 = fmul float %325, %328 %332 = fmul float %temp12.0, %329 %333 = fmul float %temp13.0, %330 %334 = fmul float %temp14.0, %331 %335 = fmul float %332, %37 %336 = fmul float %333, %38 %337 = fmul float %334, %39 %338 = fmul float %190, %335 %339 = fmul float %190, %336 %340 = fmul float %190, %337 %341 = fsub float -0.000000e+00, %196 %342 = call float @llvm.AMDGPU.cndlt(float %341, float %338, float 0.000000e+00) %343 = fsub float -0.000000e+00, %196 %344 = call float @llvm.AMDGPU.cndlt(float %343, float %339, float 0.000000e+00) %345 = fsub float -0.000000e+00, %196 %346 = call float @llvm.AMDGPU.cndlt(float %345, float %340, float 0.000000e+00) %347 = call i32 @llvm.SI.packf16(float %342, float %344) %348 = bitcast i32 %347 to float %349 = call i32 @llvm.SI.packf16(float %346, float 0.000000e+00) %350 = bitcast i32 %349 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %348, float %350, float %348, float %350) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v10, v0, 0, 0, [m0] ; C8280000 v_interp_p2_f32 v10, [v10], v1, 0, 0, [m0] ; C8290001 v_interp_p1_f32 v11, v0, 1, 0, [m0] ; C82C0100 v_interp_p2_f32 v11, [v11], v1, 1, 0, [m0] ; C82D0101 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800 v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801 v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900 v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901 v_interp_p1_f32 v2, v0, 2, 2, [m0] ; C8080A00 v_interp_p2_f32 v2, [v2], v1, 2, 2, [m0] ; C8090A01 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v13, v0, 1, 3, [m0] ; C8340D00 v_interp_p2_f32 v13, [v13], v1, 1, 3, [m0] ; C8350D01 v_interp_p1_f32 v17, v0, 2, 3, [m0] ; C8440E00 v_interp_p2_f32 v17, [v17], v1, 2, 3, [m0] ; C8450E01 v_interp_p1_f32 v5, v0, 0, 4, [m0] ; C8141000 v_interp_p2_f32 v5, [v5], v1, 0, 4, [m0] ; C8151001 v_interp_p1_f32 v4, v0, 1, 4, [m0] ; C8101100 v_interp_p2_f32 v4, [v4], v1, 1, 4, [m0] ; C8111101 v_interp_p1_f32 v3, v0, 2, 4, [m0] ; C80C1200 v_interp_p2_f32 v3, [v3], v1, 2, 4, [m0] ; C80D1201 s_load_dwordx4 s[24:27], s[4:5], 0x14 ; C08C0514 s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708 s_load_dwordx8 s[28:35], s[6:7], 0x28 ; C0CE0728 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s44, s[0:3], 0x20 ; C2160120 s_buffer_load_dword s45, s[0:3], 0x21 ; C2168121 s_load_dwordx8 s[8:15], s[6:7], 0x20 ; C0C40720 s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510 image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[36:43], s[20:23] ; F0800700 00A91206 image_sample v10, 1, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[28:35], s[24:27] ; F0800100 00C70A0A s_buffer_load_dword s20, s[0:3], 0x22 ; C20A0122 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v10, s44, v10 ; 0614142C v_mul_f32_e32 v10, s45, v10 ; 1014142D v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_and_b32_e32 v11, 0x7fffffff, v10 ; 361614FF 7FFFFFFF v_log_f32_e32 v11, v11 ; 7E164F0B v_mad_f32 v14, 2.0, v18, -1.0 ; D282000E 03CE24F4 v_mad_f32 v15, 2.0, v19, -1.0 ; D282000F 03CE26F4 v_mad_f32 v16, 2.0, v20, -1.0 ; D2820010 03CE28F4 v_mov_b32_e32 v18, 0xb58637bd ; 7E2402FF B58637BD v_add_f32_e32 v10, v10, v18 ; 0614250A v_mul_legacy_f32_e32 v11, s20, v11 ; 0E161614 v_exp_f32_e32 v11, v11 ; 7E164B0B v_mul_f32_e32 v18, v12, v12 ; 1024190C v_mad_f32 v18, v13, v13, v18 ; D2820012 044A1B0D v_mad_f32 v18, v17, v17, v18 ; D2820012 044A2311 s_buffer_load_dword s20, s[0:3], 0x24 ; C20A0124 v_rsq_clamp_f32_e64 v19, |v18| ; D3580113 00000112 s_buffer_load_dword s21, s[0:3], 0x25 ; C20A8125 s_buffer_load_dword s22, s[0:3], 0x26 ; C20B0126 s_buffer_load_dword s23, s[0:3], 0x28 ; C20B8128 s_buffer_load_dword s24, s[0:3], 0x29 ; C20C0129 v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v10, v11, 0, vcc ; D200000A 01A9010B v_min_f32_e32 v11, 0x7f7fffff, v19 ; 1E1626FF 7F7FFFFF v_mul_f32_e32 v12, v12, v11 ; 1018170C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v12, s20, v12 ; 10181814 v_mul_f32_e32 v13, v13, v11 ; 101A170D v_mad_f32 v12, -s21, v13, -v12 ; D282000C A4321A15 v_mul_f32_e32 v11, v17, v11 ; 10161711 v_mad_f32 v11, -s22, v11, v12 ; D282000B 24321616 v_subrev_f32_e32 v11, s23, v11 ; 0A161617 v_mul_f32_e32 v11, s24, v11 ; 10161618 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_mul_f32_e32 v11, v11, v11 ; 1016170B v_sub_f32_e32 v13, 1.0, v18 ; 081A24F2 v_mul_f32_e32 v12, v13, v10 ; 1018150D v_mul_f32_e32 v12, v12, v11 ; 1018170C v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v17, 0, -1.0, vcc ; D2000011 01A9E680 v_cndmask_b32_e64 v17, v17, -1.0, vcc ; D2000011 01A9E711 v_cndmask_b32_e64 v17, v17, -1.0, vcc ; D2000011 01A9E711 v_cndmask_b32_e64 v17, v17, -1.0, vcc ; D2000011 01A9E711 v_mul_f32_e32 v18, v5, v5 ; 10240B05 v_mad_f32 v18, v4, v4, v18 ; D2820012 044A0904 v_mad_f32 v18, v3, v3, v18 ; D2820012 044A0703 v_rsq_clamp_f32_e32 v21, v18 ; 7E2A5912 v_mul_f32_e32 v18, v8, v8 ; 10241108 v_mad_f32 v18, v9, v9, v18 ; D2820012 044A1309 v_mad_f32 v18, v2, v2, v18 ; D2820012 044A0502 v_rsq_clamp_f32_e32 v22, v18 ; 7E2C5912 s_buffer_load_dword s20, s[0:3], 0x19 ; C20A0119 v_mul_f32_e32 v18, v14, v14 ; 10241D0E v_mad_f32 v18, v15, v15, v18 ; D2820012 044A1F0F v_mad_f32 v18, v16, v16, v18 ; D2820012 044A2110 v_rsq_clamp_f32_e32 v23, v18 ; 7E2E5912 v_cmpx_le_f32_e32 vcc, 0, v17 ; 7C262280 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 image_sample v17, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[16:19] ; F0800100 00821106 s_buffer_load_dword s9, s[0:3], 0x1c ; C204811C s_buffer_load_dword s8, s[0:3], 0x1d ; C204011D v_mov_b32_e32 v18, 0xbeaaa64c ; 7E2402FF BEAAA64C s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v17, s20, v17, v18 ; D2820011 044A2214 s_movk_i32 s10, 0xf00 ; B00A0F00 s_buffer_load_dword s22, s[0:3], s10 ; C20B000A s_buffer_load_dword s23, s[0:3], 0x17 ; C20B8117 s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s10, s[0:3], 0x1e ; C205011E s_buffer_load_dword s12, s[0:3], 0x1f ; C206011F s_buffer_load_dword s21, s[0:3], 0x0 ; C20A8100 s_buffer_load_dword s20, s[0:3], 0x1 ; C20A0101 s_buffer_load_dword s19, s[0:3], 0x2 ; C2098102 s_buffer_load_dword s13, s[0:3], 0x10 ; C2068110 s_buffer_load_dword s14, s[0:3], 0x11 ; C2070111 s_buffer_load_dword s15, s[0:3], 0x12 ; C2078112 s_buffer_load_dword s24, s[0:3], 0x13 ; C20C0113 s_buffer_load_dword s18, s[0:3], 0x14 ; C2090114 s_buffer_load_dword s17, s[0:3], 0x15 ; C2088115 s_buffer_load_dword s16, s[0:3], 0x16 ; C2080116 v_xor_b32_e32 v18, 0x80000000, v12 ; 3A2418FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v18 ; 7C082480 v_cndmask_b32_e64 v17, 0, v17, vcc ; D2000011 01AA2280 v_cmp_gt_f32_e32 vcc, 0, v17 ; 7C082280 v_cndmask_b32_e64 v17, 0, -1.0, vcc ; D2000011 01A9E680 v_cndmask_b32_e64 v17, v17, -1.0, vcc ; D2000011 01A9E711 v_cndmask_b32_e64 v17, v17, -1.0, vcc ; D2000011 01A9E711 v_cndmask_b32_e64 v17, v17, -1.0, vcc ; D2000011 01A9E711 v_cmpx_le_f32_e32 vcc, 0, v17 ; 7C262280 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v19, s24 ; 7E260218 v_mov_b32_e32 v20, s23 ; 7E280217 v_cmp_ne_i32_e64 s[22:23], 0, s22 ; D10A0016 00002C80 v_mov_b32_e32 v17, v10 ; 7E22030A v_mov_b32_e32 v18, v10 ; 7E24030A s_and_saveexec_b64 s[22:23], s[22:23] ; BE962416 s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E s_cbranch_execz BB0_2 ; BF880000 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 v_interp_p1_f32 v17, v0, 3, 5, [m0] ; C8441700 s_buffer_load_dword s36, s[0:3], 0x6 ; C2120106 s_buffer_load_dword s37, s[0:3], 0x7 ; C2128107 s_buffer_load_dword s38, s[0:3], 0x4 ; C2130104 s_buffer_load_dword s39, s[0:3], 0x5 ; C2138105 v_interp_p2_f32 v17, [v17], v1, 3, 5, [m0] ; C8451701 v_interp_p1_f32 v18, v0, 1, 5, [m0] ; C8481500 v_rcp_f32_e32 v17, v17 ; 7E225511 v_interp_p2_f32 v18, [v18], v1, 1, 5, [m0] ; C8491501 v_interp_p1_f32 v0, v0, 0, 5, [m0] ; C8001400 v_interp_p2_f32 v0, [v0], v1, 0, 5, [m0] ; C8011401 v_mul_f32_e32 v0, v0, v17 ; 10002300 v_mul_f32_e32 v1, v18, v17 ; 10022312 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v17, s37 ; 7E220225 v_mad_f32 v17, s38, v0, v17 ; D2820011 04460026 v_mov_b32_e32 v0, s36 ; 7E000224 v_mad_f32 v18, s39, v1, v0 ; D2820012 04020227 image_sample v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[24:31], s[32:35] ; F0800700 01061811 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v17, v24, v10 ; 10221518 v_mul_f32_e32 v18, v25, v10 ; 10241519 v_mul_f32_e32 v10, v26, v10 ; 1014151A s_or_b64 exec, exec, s[22:23] ; 88FE167E v_min_f32_e32 v0, 0x7f7fffff, v21 ; 1E002AFF 7F7FFFFF v_min_f32_e32 v1, 0x7f7fffff, v22 ; 1E022CFF 7F7FFFFF v_mul_f32_e32 v8, v1, v8 ; 10101101 v_mul_f32_e32 v9, v1, v9 ; 10121301 v_min_f32_e32 v21, 0x7f7fffff, v23 ; 1E2A2EFF 7F7FFFFF s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508 s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C s_load_dwordx8 s[28:35], s[6:7], 0x10 ; C0CE0710 s_load_dwordx8 s[36:43], s[6:7], 0x18 ; C0D20718 v_sub_f32_e64 v22, 1.0, s21 ; D2080016 00002AF2 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[28:35], s[0:3] ; F0800700 00071706 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v22, v23, v22 ; 102C2D17 v_sub_f32_e64 v23, 1.0, s20 ; D2080017 000028F2 v_mul_f32_e32 v23, v24, v23 ; 102E2F18 v_sub_f32_e64 v24, 1.0, s19 ; D2080018 000026F2 v_mul_f32_e32 v24, v25, v24 ; 10303119 image_sample v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[36:43], s[24:27] ; F0800700 00C91906 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, v20, v25, s18 ; D2820006 004A3314 v_mad_f32 v7, v26, v20, s17 ; D2820007 0046291A v_mad_f32 v20, v27, v20, s16 ; D2820014 0042291B v_mul_f32_e32 v14, v21, v14 ; 101C1D15 v_mul_f32_e32 v15, v21, v15 ; 101E1F15 v_mul_f32_e32 v16, v21, v16 ; 10202115 v_mul_f32_e32 v21, v0, v5 ; 102A0B00 v_mul_f32_e32 v21, v21, v14 ; 102A1D15 v_mul_f32_e32 v25, v0, v4 ; 10320900 v_mad_f32 v21, v15, v25, v21 ; D2820015 0456330F v_mul_f32_e32 v25, v0, v3 ; 10320700 v_mad_f32 v21, v16, v25, v21 ; D2820015 04563310 v_mul_f32_e32 v25, v14, v21 ; 10322B0E v_mad_f32 v25, v21, v14, v25 ; D2820019 04661D15 v_mad_f32 v5, -v5, v0, v25 ; D2820005 24660105 v_mul_f32_e32 v25, v15, v21 ; 10322B0F v_mad_f32 v25, v21, v15, v25 ; D2820019 04661F15 v_mad_f32 v4, -v4, v0, v25 ; D2820004 24660104 v_mul_f32_e32 v25, v16, v21 ; 10322B10 v_mad_f32 v21, v21, v16, v25 ; D2820015 04662115 v_mad_f32 v0, -v3, v0, v21 ; D2820000 24560103 v_mad_f32 v3, v19, v22, s13 ; D2820003 00362D13 v_mad_f32 v21, v23, v19, s14 ; D2820015 003A2717 v_mad_f32 v19, v24, v19, s15 ; D2820013 003E2718 v_mul_f32_e32 v14, v8, v14 ; 101C1D08 v_mad_f32 v14, v15, v9, v14 ; D282000E 043A130F v_mul_f32_e32 v5, v8, v5 ; 100A0B08 v_mad_f32 v4, v4, v9, v5 ; D2820004 04161304 v_mul_f32_e32 v1, v1, v2 ; 10020501 v_mad_f32 v2, v16, v1, v14 ; D2820002 043A0310 v_mad_f32 v0, v0, v1, v4 ; D2820000 04120300 v_add_f32_e64 v1, 0, v2 clamp ; D2060801 00020480 v_mul_f32_e32 v2, v3, v1 ; 10040303 v_mul_f32_e32 v3, v21, v1 ; 10060315 v_mul_f32_e32 v4, v19, v1 ; 10080313 v_mov_b32_e32 v5, 0xb58637bd ; 7E0A02FF B58637BD v_add_f32_e32 v1, v5, v1 ; 06020305 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, v2, 0, vcc ; D2000001 01A90102 v_cndmask_b32_e64 v2, v3, 0, vcc ; D2000002 01A90103 v_cndmask_b32_e64 v3, v4, 0, vcc ; D2000003 01A90104 v_max_f32_e32 v4, 0, v13 ; 20081A80 v_mov_b32_e32 v8, 0x7fffffff ; 7E1002FF 7FFFFFFF v_and_b32_e32 v9, v4, v8 ; 36121104 v_log_f32_e32 v9, v9 ; 7E124F09 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_and_b32_e32 v8, v0, v8 ; 36101100 v_log_f32_e32 v8, v8 ; 7E104F08 v_mul_legacy_f32_e32 v9, s12, v9 ; 0E12120C v_add_f32_e32 v4, v5, v4 ; 06080905 v_add_f32_e32 v0, v5, v0 ; 06000105 v_mul_legacy_f32_e32 v5, s11, v8 ; 0E0A100B v_mov_b32_e32 v8, 0x41000000 ; 7E1002FF 41000000 v_add_f32_e32 v8, s11, v8 ; 0610100B v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_mul_f32_e32 v5, v5, v8 ; 100A1105 v_exp_f32_e32 v8, v9 ; 7E104B09 v_mul_f32_e32 v5, 0x3d22f983, v5 ; 100A0AFF 3D22F983 v_mul_f32_e32 v6, v5, v6 ; 100C0D05 v_mul_f32_e32 v7, v5, v7 ; 100E0F05 v_mul_f32_e32 v5, v5, v20 ; 100A2905 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v6, 0, vcc ; D2000000 01A90106 v_cndmask_b32_e64 v6, v7, 0, vcc ; D2000006 01A90107 v_cndmask_b32_e64 v5, v5, 0, vcc ; D2000005 01A90105 v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v4, v8, 0, vcc ; D2000004 01A90108 v_add_f32_e32 v0, v1, v0 ; 06000101 v_add_f32_e32 v1, v2, v6 ; 06020D02 v_add_f32_e32 v2, v3, v5 ; 06040B03 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_mul_f32_e32 v1, v1, v4 ; 10020901 v_mul_f32_e32 v2, v2, v4 ; 10040902 v_mul_f32_e32 v0, v0, v17 ; 10002300 v_mul_f32_e32 v1, v1, v18 ; 10022501 v_mul_f32_e32 v2, v2, v10 ; 10041502 v_mul_f32_e32 v0, s9, v0 ; 10000009 v_mul_f32_e32 v1, s8, v1 ; 10020208 v_mul_f32_e32 v2, s10, v2 ; 1004040A v_mul_f32_e32 v0, v0, v11 ; 10001700 v_mul_f32_e32 v1, v1, v11 ; 10021701 v_mul_f32_e32 v2, v2, v11 ; 10041702 v_xor_b32_e32 v3, 0x80000000, v12 ; 3A0618FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v0, 0, v0, vcc ; D2000000 01AA0080 v_cndmask_b32_e64 v1, 0, v1, vcc ; D2000001 01AA0280 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cndmask_b32_e64 v1, 0, v2, vcc ; D2000001 01AA0480 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 28 Code Size: 1472 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL IN[2], TEXCOORD[4], PERSPECTIVE DCL IN[3], TEXCOORD[5], PERSPECTIVE DCL IN[4], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL CONST[0..10] DCL TEMP[0] DCL TEMP[1], LOCAL DCL TEMP[2..4] IMM[0] FLT32 { 0.0000, 2.0000, -0.0000, 1.0000} IMM[1] FLT32 { 15.0000, 0.9151, 0.0000, 340282346638528859811704183484516925440.0000} 0: DP3 TEMP[0].x, IN[3], IN[3] 1: RSQ TEMP[1], |TEMP[0].xxxx| 2: MIN TEMP[0].y, IMM[1].wwww, TEMP[1] 3: ADD TEMP[0].x, -TEMP[0].xxxx, IMM[0].wwww 4: MUL TEMP[0].yzw, TEMP[0].yyyy, IN[3].xxyz 5: DP3 TEMP[0].y, TEMP[0].yzww, -CONST[9] 6: ADD TEMP[0].y, TEMP[0].yyyy, -CONST[10].xxxx 7: MUL_SAT TEMP[0].y, TEMP[0].yyyy, CONST[10].yyyy 8: MUL TEMP[0].y, TEMP[0].yyyy, TEMP[0].yyyy 9: TEX TEMP[2], IN[0], SAMP[2], 2D 10: ADD TEMP[0].z, TEMP[2].xxxx, CONST[8].xxxx 11: MUL_SAT TEMP[0].z, TEMP[0].zzzz, CONST[8].yyyy 12: ADD TEMP[0].w, TEMP[0].zzzz, IMM[0].zzzz 13: POW TEMP[2].x, |TEMP[0].zzzz|, CONST[8].zzzz 14: CMP TEMP[0].z, TEMP[0].wwww, IMM[0].xxxx, TEMP[2].xxxx 15: MUL TEMP[0].w, TEMP[0].zzzz, TEMP[0].xxxx 16: MAX TEMP[2].x, TEMP[0].xxxx, IMM[0].xxxx 17: MUL TEMP[3], TEMP[0].yyyy, TEMP[0].wwww 18: MOV TEMP[4], TEMP[3] 19: KILL_IF TEMP[4] 20: ADD TEMP[0].x, TEMP[2].xxxx, IMM[0].zzzz 21: POW TEMP[0].w, |TEMP[2].xxxx|, CONST[7].wwww 22: CMP TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx, TEMP[0].wwww 23: MOV TEMP[0].w, IMM[0].wwww 24: ADD TEMP[2].xyz, TEMP[0].wwww, -CONST[0] 25: TEX TEMP[4], IN[1], SAMP[0], 2D 26: MUL TEMP[2].xyz, TEMP[2], TEMP[4] 27: MAD TEMP[2].xyz, TEMP[2], CONST[4].wwww, CONST[4] 28: DP3 TEMP[1].x, IN[2], IN[2] 29: RSQ TEMP[1].x, TEMP[1].xxxx 30: MIN TEMP[1].x, IMM[1].wwww, TEMP[1].xxxx 31: MUL TEMP[3].xyz, IN[2], TEMP[1].xxxx 32: MOV_SAT TEMP[0].w, TEMP[3].zzzz 33: MUL TEMP[2].xyz, TEMP[0].wwww, TEMP[2] 34: ADD TEMP[0].w, TEMP[0].wwww, IMM[0].zzzz 35: CMP TEMP[2].xyz, TEMP[0].wwww, IMM[0].xxxx, TEMP[2] 36: DP3 TEMP[1].x, IN[4], IN[4] 37: RSQ TEMP[1].x, TEMP[1].xxxx 38: MIN TEMP[1].x, IMM[1].wwww, TEMP[1].xxxx 39: MUL TEMP[4].xyz, IN[4], TEMP[1].xxxx 40: MAD TEMP[4].xyz, TEMP[4].zzzz, IMM[0].xxyw, -TEMP[4] 41: DP3_SAT TEMP[0].w, TEMP[4], TEMP[3] 42: POW TEMP[2].w, |TEMP[0].wwww|, IMM[1].xxxx 43: ADD TEMP[0].w, TEMP[0].wwww, IMM[0].zzzz 44: MUL TEMP[2].w, TEMP[2].wwww, IMM[1].yyyy 45: MUL TEMP[3].xyz, TEMP[2].wwww, CONST[5] 46: CMP TEMP[3].xyz, TEMP[0].wwww, IMM[0].xxxx, TEMP[3] 47: ADD TEMP[2].xyz, TEMP[2], TEMP[3] 48: MUL TEMP[2].xyz, TEMP[0].xxxx, TEMP[2] 49: MUL TEMP[0].xzw, TEMP[0].zzzz, TEMP[2].xyyz 50: MUL TEMP[0].xzw, TEMP[0], CONST[7].xyyz 51: MUL TEMP[0].xyz, TEMP[0].yyyy, TEMP[0].xzww 52: TEX TEMP[2], IN[1], SAMP[1], 2D 53: MAX TEMP[3].x, TEMP[2].xxxx, CONST[6].yyyy 54: MIN_SAT TEMP[0].w, CONST[6].xxxx, TEMP[3].xxxx 55: CMP OUT[0], -TEMP[3].wwww, TEMP[0], IMM[0].xxxx 56: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %48 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %49 = load <32 x i8>, <32 x i8> addrspace(2)* %48, align 32, !tbaa !0 %50 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %51 = load <16 x i8>, <16 x i8> addrspace(2)* %50, align 16, !tbaa !0 %52 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %53 = bitcast <8 x i32> addrspace(2)* %52 to <32 x i8> addrspace(2)* %54 = load <32 x i8>, <32 x i8> addrspace(2)* %53, align 32, !tbaa !0 %55 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %56 = bitcast <4 x i32> addrspace(2)* %55 to <16 x i8> addrspace(2)* %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 %58 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %59 = bitcast <8 x i32> addrspace(2)* %58 to <32 x i8> addrspace(2)* %60 = load <32 x i8>, <32 x i8> addrspace(2)* %59, align 32, !tbaa !0 %61 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %62 = bitcast <4 x i32> addrspace(2)* %61 to <16 x i8> addrspace(2)* %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 %64 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %66 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %67 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %68 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %71 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %77 = fmul float %71, %71 %78 = fmul float %72, %72 %79 = fadd float %78, %77 %80 = fmul float %73, %73 %81 = fadd float %79, %80 %82 = call float @fabs(float %81) %83 = call float @llvm.AMDGPU.rsq.clamped.f32(float %82) %84 = call float @llvm.minnum.f32(float %83, float 0x47EFFFFFE0000000) %85 = fsub float 1.000000e+00, %81 %86 = fmul float %84, %71 %87 = fmul float %84, %72 %88 = fmul float %84, %73 %89 = fmul float %43, %86 %90 = fsub float -0.000000e+00, %89 %91 = fmul float %44, %87 %92 = fsub float %90, %91 %93 = fmul float %45, %88 %94 = fsub float %92, %93 %95 = fsub float %94, %46 %96 = fmul float %95, %47 %97 = call float @llvm.AMDIL.clamp.(float %96, float 0.000000e+00, float 1.000000e+00) %98 = fmul float %97, %97 %99 = bitcast float %64 to i32 %100 = bitcast float %65 to i32 %101 = insertelement <2 x i32> undef, i32 %99, i32 0 %102 = insertelement <2 x i32> %101, i32 %100, i32 1 %103 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %102, <32 x i8> %60, <16 x i8> %63, i32 2) %104 = extractelement <4 x float> %103, i32 0 %105 = fadd float %104, %40 %106 = fmul float %105, %41 %107 = call float @llvm.AMDIL.clamp.(float %106, float 0.000000e+00, float 1.000000e+00) %108 = fadd float %107, 0xBEB0C6F7A0000000 %109 = call float @fabs(float %107) %110 = call float @llvm.pow.f32(float %109, float %42) %111 = call float @llvm.AMDGPU.cndlt(float %108, float 0.000000e+00, float %110) %112 = fmul float %111, %85 %113 = call float @llvm.maxnum.f32(float %85, float 0.000000e+00) %114 = fmul float %98, %112 %115 = fmul float %98, %112 %116 = fmul float %98, %112 %117 = fmul float %98, %112 %118 = fcmp olt float %114, 0.000000e+00 %119 = fcmp olt float %115, 0.000000e+00 %120 = fcmp olt float %116, 0.000000e+00 %121 = fcmp olt float %117, 0.000000e+00 %122 = or i1 %121, %120 %123 = or i1 %122, %119 %124 = or i1 %123, %118 %125 = select i1 %124, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %125) %126 = fadd float %113, 0xBEB0C6F7A0000000 %127 = call float @fabs(float %113) %128 = call float @llvm.pow.f32(float %127, float %39) %129 = call float @llvm.AMDGPU.cndlt(float %126, float 0.000000e+00, float %128) %130 = fsub float 1.000000e+00, %24 %131 = fsub float 1.000000e+00, %25 %132 = fsub float 1.000000e+00, %26 %133 = bitcast float %66 to i32 %134 = bitcast float %67 to i32 %135 = insertelement <2 x i32> undef, i32 %133, i32 0 %136 = insertelement <2 x i32> %135, i32 %134, i32 1 %137 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %136, <32 x i8> %49, <16 x i8> %51, i32 2) %138 = extractelement <4 x float> %137, i32 0 %139 = extractelement <4 x float> %137, i32 1 %140 = extractelement <4 x float> %137, i32 2 %141 = fmul float %130, %138 %142 = fmul float %131, %139 %143 = fmul float %132, %140 %144 = fmul float %141, %30 %145 = fadd float %144, %27 %146 = fmul float %142, %30 %147 = fadd float %146, %28 %148 = fmul float %143, %30 %149 = fadd float %148, %29 %150 = fmul float %68, %68 %151 = fmul float %69, %69 %152 = fadd float %151, %150 %153 = fmul float %70, %70 %154 = fadd float %152, %153 %155 = call float @llvm.AMDGPU.rsq.clamped.f32(float %154) %156 = call float @llvm.minnum.f32(float %155, float 0x47EFFFFFE0000000) %157 = fmul float %68, %156 %158 = fmul float %69, %156 %159 = fmul float %70, %156 %160 = call float @llvm.AMDIL.clamp.(float %159, float 0.000000e+00, float 1.000000e+00) %161 = fmul float %160, %145 %162 = fmul float %160, %147 %163 = fmul float %160, %149 %164 = fadd float %160, 0xBEB0C6F7A0000000 %165 = call float @llvm.AMDGPU.cndlt(float %164, float 0.000000e+00, float %161) %166 = call float @llvm.AMDGPU.cndlt(float %164, float 0.000000e+00, float %162) %167 = call float @llvm.AMDGPU.cndlt(float %164, float 0.000000e+00, float %163) %168 = fmul float %74, %74 %169 = fmul float %75, %75 %170 = fadd float %169, %168 %171 = fmul float %76, %76 %172 = fadd float %170, %171 %173 = call float @llvm.AMDGPU.rsq.clamped.f32(float %172) %174 = call float @llvm.minnum.f32(float %173, float 0x47EFFFFFE0000000) %175 = fmul float %74, %174 %176 = fmul float %75, %174 %177 = fmul float %76, %174 %178 = fmul float %177, 0.000000e+00 %179 = fsub float %178, %175 %180 = fmul float %177, 0.000000e+00 %181 = fsub float %180, %176 %182 = fmul float %177, 2.000000e+00 %183 = fsub float %182, %177 %184 = fmul float %179, %157 %185 = fmul float %181, %158 %186 = fadd float %185, %184 %187 = fmul float %183, %159 %188 = fadd float %186, %187 %189 = call float @llvm.AMDIL.clamp.(float %188, float 0.000000e+00, float 1.000000e+00) %190 = call float @fabs(float %189) %191 = call float @llvm.pow.f32(float %190, float 1.500000e+01) %192 = fadd float %189, 0xBEB0C6F7A0000000 %193 = fmul float %191, 0x3FED48D5A0000000 %194 = fmul float %193, %31 %195 = fmul float %193, %32 %196 = fmul float %193, %33 %197 = call float @llvm.AMDGPU.cndlt(float %192, float 0.000000e+00, float %194) %198 = call float @llvm.AMDGPU.cndlt(float %192, float 0.000000e+00, float %195) %199 = call float @llvm.AMDGPU.cndlt(float %192, float 0.000000e+00, float %196) %200 = fadd float %165, %197 %201 = fadd float %166, %198 %202 = fadd float %167, %199 %203 = fmul float %129, %200 %204 = fmul float %129, %201 %205 = fmul float %129, %202 %206 = fmul float %111, %203 %207 = fmul float %111, %204 %208 = fmul float %111, %205 %209 = fmul float %206, %36 %210 = fmul float %207, %37 %211 = fmul float %208, %38 %212 = fmul float %98, %209 %213 = fmul float %98, %210 %214 = fmul float %98, %211 %215 = bitcast float %66 to i32 %216 = bitcast float %67 to i32 %217 = insertelement <2 x i32> undef, i32 %215, i32 0 %218 = insertelement <2 x i32> %217, i32 %216, i32 1 %219 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %218, <32 x i8> %54, <16 x i8> %57, i32 2) %220 = extractelement <4 x float> %219, i32 0 %221 = call float @llvm.maxnum.f32(float %220, float %35) %222 = call float @llvm.minnum.f32(float %34, float %221) %223 = call float @llvm.AMDIL.clamp.(float %222, float 0.000000e+00, float 1.000000e+00) %224 = fsub float -0.000000e+00, %117 %225 = call float @llvm.AMDGPU.cndlt(float %224, float %212, float 0.000000e+00) %226 = fsub float -0.000000e+00, %117 %227 = call float @llvm.AMDGPU.cndlt(float %226, float %213, float 0.000000e+00) %228 = fsub float -0.000000e+00, %117 %229 = call float @llvm.AMDGPU.cndlt(float %228, float %214, float 0.000000e+00) %230 = fsub float -0.000000e+00, %117 %231 = call float @llvm.AMDGPU.cndlt(float %230, float %223, float 0.000000e+00) %232 = call i32 @llvm.SI.packf16(float %225, float %227) %233 = bitcast i32 %232 to float %234 = call i32 @llvm.SI.packf16(float %229, float %231) %235 = bitcast i32 %234 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %233, float %235, float %233, float %235) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v2, 0xb58637bd ; 7E0402FF B58637BD v_mov_b32_e32 v3, 0x7fffffff ; 7E0602FF 7FFFFFFF v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800 v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801 v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900 v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901 v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00 v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v13, v0, 2, 3, [m0] ; C8340E00 v_interp_p2_f32 v13, [v13], v1, 2, 3, [m0] ; C8350E01 v_interp_p1_f32 v14, v0, 0, 4, [m0] ; C8381000 v_interp_p2_f32 v14, [v14], v1, 0, 4, [m0] ; C8391001 v_interp_p1_f32 v15, v0, 1, 4, [m0] ; C83C1100 v_interp_p2_f32 v15, [v15], v1, 1, 4, [m0] ; C83D1101 s_load_dwordx4 s[8:11], s[4:5], 0x8 ; C0840508 s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710 v_interp_p1_f32 v0, v0, 2, 4, [m0] ; C8001200 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s20, s[0:3], 0x24 ; C20A0124 v_mul_f32_e32 v16, v11, v11 ; 1020170B v_mad_f32 v16, v12, v12, v16 ; D2820010 0442190C v_mad_f32 v16, v13, v13, v16 ; D2820010 04421B0D s_buffer_load_dword s21, s[0:3], 0x20 ; C20A8120 v_rsq_clamp_f32_e64 v17, |v16| ; D3580111 00000110 s_buffer_load_dword s22, s[0:3], 0x21 ; C20B0121 v_interp_p2_f32 v0, [v0], v1, 2, 4, [m0] ; C8011201 image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800100 00430104 v_min_f32_e32 v4, 0x7f7fffff, v17 ; 1E0822FF 7F7FFFFF v_mul_f32_e32 v5, v11, v4 ; 100A090B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s20, v5 ; 100A0A14 s_buffer_load_dword s8, s[0:3], 0x22 ; C2040122 s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v1, s21, v1 ; 06020215 s_buffer_load_dword s10, s[0:3], 0x26 ; C2050126 v_mul_f32_e32 v1, s22, v1 ; 10020216 v_mul_f32_e32 v11, v12, v4 ; 1016090C v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_and_b32_e32 v12, v1, v3 ; 36180701 v_log_f32_e32 v12, v12 ; 7E184F0C s_buffer_load_dword s11, s[0:3], 0x28 ; C2058128 s_buffer_load_dword s12, s[0:3], 0x29 ; C2060129 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, -s9, v11, -v5 ; D2820005 A4161609 v_mul_f32_e32 v4, v13, v4 ; 1008090D v_mad_f32 v4, -s10, v4, v5 ; D2820004 2416080A v_mul_legacy_f32_e32 v5, s8, v12 ; 0E0A1808 v_add_f32_e32 v1, v2, v1 ; 06020302 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, v5, 0, vcc ; D2000001 01A90105 v_subrev_f32_e32 v4, s11, v4 ; 0A08080B v_mul_f32_e32 v4, s12, v4 ; 1008080C v_sub_f32_e32 v5, 1.0, v16 ; 080A20F2 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_mul_f32_e32 v11, v5, v1 ; 10160305 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx8 s[12:19], s[6:7], 0x8 ; C0C60708 v_mul_f32_e32 v11, v11, v4 ; 1016090B v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_cndmask_b32_e64 v12, 0, -1.0, vcc ; D200000C 01A9E680 v_cndmask_b32_e64 v12, v12, -1.0, vcc ; D200000C 01A9E70C v_cndmask_b32_e64 v12, v12, -1.0, vcc ; D200000C 01A9E70C v_cndmask_b32_e64 v12, v12, -1.0, vcc ; D200000C 01A9E70C v_max_f32_e32 v5, 0, v5 ; 200A0A80 v_cmpx_le_f32_e32 vcc, 0, v12 ; 7C261880 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[16:18], 7, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[24:31], s[20:23] ; F0800700 00A61006 image_sample v6, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[12:19], s[8:11] ; F0800100 00430606 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s8, s[0:3], 0x10 ; C2040110 s_buffer_load_dword s9, s[0:3], 0x11 ; C2048111 s_buffer_load_dword s10, s[0:3], 0x12 ; C2050112 s_buffer_load_dword s11, s[0:3], 0x13 ; C2058113 s_buffer_load_dword s12, s[0:3], 0x14 ; C2060114 s_buffer_load_dword s13, s[0:3], 0x15 ; C2068115 s_buffer_load_dword s14, s[0:3], 0x16 ; C2070116 s_buffer_load_dword s15, s[0:3], 0x18 ; C2078118 s_buffer_load_dword s16, s[0:3], 0x19 ; C2080119 s_buffer_load_dword s17, s[0:3], 0x1c ; C208811C s_buffer_load_dword s18, s[0:3], 0x1d ; C209011D s_buffer_load_dword s0, s[0:3], 0x1e ; C200011E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_sub_f32_e64 v7, 1.0, s5 ; D2080007 00000AF2 v_sub_f32_e64 v12, 1.0, s6 ; D208000C 00000CF2 v_sub_f32_e64 v13, 1.0, s7 ; D208000D 00000EF2 v_mul_f32_e32 v7, v16, v7 ; 100E0F10 v_mul_f32_e32 v12, v17, v12 ; 10181911 v_mul_f32_e32 v13, v18, v13 ; 101A1B12 v_mov_b32_e32 v16, s8 ; 7E200208 v_and_b32_e32 v17, v5, v3 ; 36220705 v_log_f32_e32 v17, v17 ; 7E224F11 v_mad_f32 v7, s11, v7, v16 ; D2820007 04420E0B v_mov_b32_e32 v16, s9 ; 7E200209 v_mad_f32 v12, s11, v12, v16 ; D282000C 0442180B v_mov_b32_e32 v16, s10 ; 7E20020A v_mad_f32 v13, s11, v13, v16 ; D282000D 04421A0B v_mul_legacy_f32_e32 v16, s4, v17 ; 0E202204 v_add_f32_e32 v5, v2, v5 ; 060A0B02 v_mul_f32_e32 v17, v14, v14 ; 10221D0E v_mad_f32 v17, v15, v15, v17 ; D2820011 04461F0F v_mad_f32 v17, v0, v0, v17 ; D2820011 04460100 v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 v_exp_f32_e32 v16, v16 ; 7E204B10 v_cmp_gt_f32_e32 vcc, 0, v5 ; 7C080A80 v_cndmask_b32_e64 v5, v16, 0, vcc ; D2000005 01A90110 v_min_f32_e32 v16, 0x7f7fffff, v17 ; 1E2022FF 7F7FFFFF v_mul_f32_e32 v17, v16, v0 ; 10220110 v_mad_f32 v18, v0, v16, v17 ; D2820012 04462100 v_mad_f32 v0, -v0, v16, v18 ; D2820000 244A2100 v_mul_f32_e32 v18, v8, v8 ; 10241108 v_mad_f32 v18, v9, v9, v18 ; D2820012 044A1309 v_mad_f32 v18, v10, v10, v18 ; D2820012 044A150A v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 v_mul_f32_e32 v14, v16, v14 ; 101C1D10 v_mul_f32_e32 v15, v16, v15 ; 101E1F10 v_mad_f32 v14, 0, v17, -v14 ; D282000E 843A2280 v_mad_f32 v15, 0, v17, -v15 ; D282000F 843E2280 v_min_f32_e32 v16, 0x7f7fffff, v18 ; 1E2024FF 7F7FFFFF v_mul_f32_e32 v8, v16, v8 ; 10101110 v_mul_f32_e32 v8, v8, v14 ; 10101D08 v_mul_f32_e32 v9, v16, v9 ; 10121310 v_mad_f32 v8, v15, v9, v8 ; D2820008 0422130F v_mul_f32_e32 v9, v16, v10 ; 10121510 v_mad_f32 v0, v0, v9, v8 ; D2820000 04221300 v_add_f32_e64 v8, 0, v9 clamp ; D2060808 00021280 v_mul_f32_e32 v7, v7, v8 ; 100E1107 v_mul_f32_e32 v9, v12, v8 ; 1012110C v_mul_f32_e32 v10, v13, v8 ; 1014110D v_add_f32_e32 v8, v2, v8 ; 06101102 v_cmp_gt_f32_e32 vcc, 0, v8 ; 7C081080 v_cndmask_b32_e64 v7, v7, 0, vcc ; D2000007 01A90107 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_and_b32_e32 v3, v0, v3 ; 36060700 v_log_f32_e32 v3, v3 ; 7E064F03 v_cndmask_b32_e64 v8, v9, 0, vcc ; D2000008 01A90109 v_cndmask_b32_e64 v9, v10, 0, vcc ; D2000009 01A9010A v_add_f32_e32 v0, v2, v0 ; 06000102 v_mul_legacy_f32_e32 v2, 0x41700000, v3 ; 0E0406FF 41700000 v_exp_f32_e32 v2, v2 ; 7E044B02 v_mul_f32_e32 v2, 0x3f6a46ad, v2 ; 100404FF 3F6A46AD v_mul_f32_e32 v3, s12, v2 ; 1006040C v_mul_f32_e32 v10, s13, v2 ; 1014040D v_mul_f32_e32 v2, s14, v2 ; 1004040E v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v3, 0, vcc ; D2000000 01A90103 v_cndmask_b32_e64 v3, v10, 0, vcc ; D2000003 01A9010A v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_add_f32_e32 v0, v0, v7 ; 06000F00 v_add_f32_e32 v3, v3, v8 ; 06061103 v_add_f32_e32 v2, v2, v9 ; 06041302 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mul_f32_e32 v3, v3, v5 ; 10060B03 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_mul_f32_e32 v3, v3, v1 ; 10060303 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_mul_f32_e32 v0, s17, v0 ; 10000011 v_mul_f32_e32 v2, s18, v3 ; 10040612 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_mul_f32_e32 v2, v2, v4 ; 10040902 v_mul_f32_e32 v1, v1, v4 ; 10020901 v_max_f32_e32 v3, s16, v6 ; 20060C10 v_min_f32_e32 v3, s15, v3 ; 1E06060F v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_xor_b32_e32 v4, 0x80000000, v11 ; 3A0816FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v0, 0, v0, vcc ; D2000000 01AA0080 v_cndmask_b32_e64 v2, 0, v2, vcc ; D2000002 01AA0480 v_cndmask_b32_e64 v1, 0, v1, vcc ; D2000001 01AA0280 v_cndmask_b32_e64 v3, 0, v3, vcc ; D2000003 01AA0680 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 20 Code Size: 1008 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL IN[2], TEXCOORD[5], PERSPECTIVE DCL IN[3], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[0..11] DCL TEMP[0], LOCAL DCL TEMP[1..6] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { 0.8165, 0.5774, 0.0000, 0.0039} IMM[2] FLT32 { -0.7071, -0.4082, 0.5774, 0.7071} IMM[3] FLT32 {340282346638528859811704183484516925440.0000, -340282346638528859811704183484516925440.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[3], IN[3] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[3].xxxx, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[3], TEMP[0].xxxx 4: TEX TEMP[2], IN[1], SAMP[2], 2D 5: MAD TEMP[2].xyz, TEMP[2], IMM[0].xxxx, IMM[0].yyyy 6: DP3 TEMP[0].x, TEMP[2], TEMP[2] 7: RSQ TEMP[0].x, TEMP[0].xxxx 8: MIN TEMP[0].x, IMM[3].xxxx, TEMP[0].xxxx 9: MUL TEMP[3].xyz, TEMP[2], TEMP[0].xxxx 10: DP3 TEMP[1].w, TEMP[3], TEMP[1] 11: MUL TEMP[2].xyz, TEMP[1].wwww, TEMP[3] 12: MAD TEMP[1].xyz, TEMP[2], IMM[0].xxxx, -TEMP[1] 13: DP2 TEMP[0].x, TEMP[1].yzzw, IMM[1] 14: ADD_SAT TEMP[2].x, IMM[1].zzzz, TEMP[0].xxxx 15: DP3_SAT TEMP[2].y, TEMP[1], IMM[2] 16: DP3_SAT TEMP[2].z, TEMP[1].yzxw, IMM[2].yzww 17: MAX TEMP[1].xyz, TEMP[2], IMM[0].wwww 18: LG2 TEMP[0].x, |TEMP[1].xxxx| 19: MAX TEMP[2].x, IMM[3].yyyy, TEMP[0].xxxx 20: LG2 TEMP[0].x, |TEMP[1].yyyy| 21: MAX TEMP[2].y, IMM[3].yyyy, TEMP[0].xxxx 22: LG2 TEMP[0].x, |TEMP[1].zzzz| 23: MAX TEMP[2].z, IMM[3].yyyy, TEMP[0].xxxx 24: MOV TEMP[1].z, IMM[0].zzzz 25: ADD TEMP[1].x, TEMP[1].zzzz, CONST[10].xxxx 26: MUL TEMP[1].xyw, TEMP[2].xyzz, TEMP[1].xxxx 27: EX2 TEMP[2].x, TEMP[1].xxxx 28: EX2 TEMP[2].y, TEMP[1].yyyy 29: EX2 TEMP[2].z, TEMP[1].wwww 30: TEX TEMP[4], IN[0], SAMP[1], 2D 31: MUL TEMP[1].xyw, TEMP[4].xyzz, CONST[7].xyzz 32: DP3 TEMP[2].x, TEMP[1].xyww, TEMP[2] 33: MUL TEMP[2].yzw, CONST[9].xxyz, CONST[9].wwww 34: TEX TEMP[4], IN[1], SAMP[4], 2D 35: MUL TEMP[2].yzw, TEMP[2], TEMP[4].xxxx 36: MAD TEMP[2].yzw, TEMP[2], CONST[5].wwww, CONST[5].xxyz 37: TEX TEMP[4], IN[0], SAMP[0], 2D 38: MUL TEMP[4].xyz, TEMP[4], CONST[6] 39: MUL TEMP[2].yzw, TEMP[2], TEMP[4].xxyz 40: MUL TEMP[2].xyz, TEMP[2].xxxx, TEMP[2].yzww 41: MUL TEMP[5].xyz, CONST[8], CONST[8].wwww 42: TEX TEMP[6], IN[1], SAMP[3], 2D 43: MUL TEMP[5].xyz, TEMP[5], TEMP[6] 44: ADD TEMP[6].xyz, TEMP[1].zzzz, -CONST[0] 45: MUL TEMP[5].xyz, TEMP[5], TEMP[6] 46: MAD TEMP[5].xyz, TEMP[5], CONST[4].wwww, CONST[4] 47: MUL TEMP[4].xyz, TEMP[4], TEMP[5] 48: DP2 TEMP[0].x, TEMP[3].yzzw, IMM[1] 49: ADD_SAT TEMP[6].x, IMM[1].zzzz, TEMP[0].xxxx 50: DP3_SAT TEMP[6].y, TEMP[3], IMM[2] 51: DP3_SAT TEMP[6].z, TEMP[3].yzxw, IMM[2].yzww 52: MUL TEMP[3].xyz, TEMP[6], TEMP[6] 53: MAX TEMP[6].xyz, TEMP[3], IMM[0].wwww 54: DP3 TEMP[1].x, TEMP[1].xyww, TEMP[6] 55: MAD TEMP[1].xyz, TEMP[4], TEMP[1].xxxx, TEMP[2] 56: ADD TEMP[1].xyz, TEMP[1], CONST[0] 57: MAD OUT[0].xyz, TEMP[5], CONST[11], TEMP[1] 58: MUL OUT[0].w, IMM[1].wwww, IN[2].wwww 59: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %53 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %54 = load <32 x i8>, <32 x i8> addrspace(2)* %53, align 32, !tbaa !0 %55 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 %57 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %58 = bitcast <8 x i32> addrspace(2)* %57 to <32 x i8> addrspace(2)* %59 = load <32 x i8>, <32 x i8> addrspace(2)* %58, align 32, !tbaa !0 %60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %61 = bitcast <4 x i32> addrspace(2)* %60 to <16 x i8> addrspace(2)* %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %64 = bitcast <8 x i32> addrspace(2)* %63 to <32 x i8> addrspace(2)* %65 = load <32 x i8>, <32 x i8> addrspace(2)* %64, align 32, !tbaa !0 %66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %67 = bitcast <4 x i32> addrspace(2)* %66 to <16 x i8> addrspace(2)* %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %70 = bitcast <8 x i32> addrspace(2)* %69 to <32 x i8> addrspace(2)* %71 = load <32 x i8>, <32 x i8> addrspace(2)* %70, align 32, !tbaa !0 %72 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %73 = bitcast <4 x i32> addrspace(2)* %72 to <16 x i8> addrspace(2)* %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %76 = bitcast <8 x i32> addrspace(2)* %75 to <32 x i8> addrspace(2)* %77 = load <32 x i8>, <32 x i8> addrspace(2)* %76, align 32, !tbaa !0 %78 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %79 = bitcast <4 x i32> addrspace(2)* %78 to <16 x i8> addrspace(2)* %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %89 = fmul float %86, %86 %90 = fmul float %87, %87 %91 = fadd float %90, %89 %92 = fmul float %88, %88 %93 = fadd float %91, %92 %94 = call float @llvm.AMDGPU.rsq.clamped.f32(float %93) %95 = call float @llvm.minnum.f32(float %94, float 0x47EFFFFFE0000000) %96 = fmul float %86, %95 %97 = fmul float %87, %95 %98 = fmul float %88, %95 %99 = bitcast float %83 to i32 %100 = bitcast float %84 to i32 %101 = insertelement <2 x i32> undef, i32 %99, i32 0 %102 = insertelement <2 x i32> %101, i32 %100, i32 1 %103 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %102, <32 x i8> %65, <16 x i8> %68, i32 2) %104 = extractelement <4 x float> %103, i32 0 %105 = extractelement <4 x float> %103, i32 1 %106 = extractelement <4 x float> %103, i32 2 %107 = fmul float %104, 2.000000e+00 %108 = fadd float %107, -1.000000e+00 %109 = fmul float %105, 2.000000e+00 %110 = fadd float %109, -1.000000e+00 %111 = fmul float %106, 2.000000e+00 %112 = fadd float %111, -1.000000e+00 %113 = fmul float %108, %108 %114 = fmul float %110, %110 %115 = fadd float %114, %113 %116 = fmul float %112, %112 %117 = fadd float %115, %116 %118 = call float @llvm.AMDGPU.rsq.clamped.f32(float %117) %119 = call float @llvm.minnum.f32(float %118, float 0x47EFFFFFE0000000) %120 = fmul float %108, %119 %121 = fmul float %110, %119 %122 = fmul float %112, %119 %123 = fmul float %120, %96 %124 = fmul float %121, %97 %125 = fadd float %124, %123 %126 = fmul float %122, %98 %127 = fadd float %125, %126 %128 = fmul float %127, %120 %129 = fmul float %127, %121 %130 = fmul float %127, %122 %131 = fmul float %128, 2.000000e+00 %132 = fsub float %131, %96 %133 = fmul float %129, 2.000000e+00 %134 = fsub float %133, %97 %135 = fmul float %130, 2.000000e+00 %136 = fsub float %135, %98 %137 = fmul float %134, 0x3FEA20BD80000000 %138 = fmul float %136, 0x3FE279A740000000 %139 = fadd float %137, %138 %140 = fadd float %139, 0.000000e+00 %141 = call float @llvm.AMDIL.clamp.(float %140, float 0.000000e+00, float 1.000000e+00) %142 = fmul float %132, 0xBFE6A09E60000000 %143 = fmul float %134, 0xBFDA20BD80000000 %144 = fadd float %143, %142 %145 = fmul float %136, 0x3FE279A740000000 %146 = fadd float %144, %145 %147 = call float @llvm.AMDIL.clamp.(float %146, float 0.000000e+00, float 1.000000e+00) %148 = fmul float %134, 0xBFDA20BD80000000 %149 = fmul float %136, 0x3FE279A740000000 %150 = fadd float %149, %148 %151 = fmul float %132, 0x3FE6A09E60000000 %152 = fadd float %150, %151 %153 = call float @llvm.AMDIL.clamp.(float %152, float 0.000000e+00, float 1.000000e+00) %154 = call float @llvm.maxnum.f32(float %141, float 0x3EB0C6F7A0000000) %155 = call float @llvm.maxnum.f32(float %147, float 0x3EB0C6F7A0000000) %156 = call float @llvm.maxnum.f32(float %153, float 0x3EB0C6F7A0000000) %157 = call float @fabs(float %154) %158 = call float @llvm.log2.f32(float %157) %159 = call float @llvm.maxnum.f32(float %158, float 0xC7EFFFFFE0000000) %160 = call float @fabs(float %155) %161 = call float @llvm.log2.f32(float %160) %162 = call float @llvm.maxnum.f32(float %161, float 0xC7EFFFFFE0000000) %163 = call float @fabs(float %156) %164 = call float @llvm.log2.f32(float %163) %165 = call float @llvm.maxnum.f32(float %164, float 0xC7EFFFFFE0000000) %166 = fadd float %49, 1.000000e+00 %167 = fmul float %159, %166 %168 = fmul float %162, %166 %169 = fmul float %165, %166 %170 = call float @llvm.AMDIL.exp.(float %167) %171 = call float @llvm.AMDIL.exp.(float %168) %172 = call float @llvm.AMDIL.exp.(float %169) %173 = bitcast float %81 to i32 %174 = bitcast float %82 to i32 %175 = insertelement <2 x i32> undef, i32 %173, i32 0 %176 = insertelement <2 x i32> %175, i32 %174, i32 1 %177 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %176, <32 x i8> %59, <16 x i8> %62, i32 2) %178 = extractelement <4 x float> %177, i32 0 %179 = extractelement <4 x float> %177, i32 1 %180 = extractelement <4 x float> %177, i32 2 %181 = fmul float %178, %38 %182 = fmul float %179, %39 %183 = fmul float %180, %40 %184 = fmul float %181, %170 %185 = fmul float %182, %171 %186 = fadd float %185, %184 %187 = fmul float %183, %172 %188 = fadd float %186, %187 %189 = fmul float %45, %48 %190 = fmul float %46, %48 %191 = fmul float %47, %48 %192 = bitcast float %83 to i32 %193 = bitcast float %84 to i32 %194 = insertelement <2 x i32> undef, i32 %192, i32 0 %195 = insertelement <2 x i32> %194, i32 %193, i32 1 %196 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %195, <32 x i8> %77, <16 x i8> %80, i32 2) %197 = extractelement <4 x float> %196, i32 0 %198 = fmul float %189, %197 %199 = fmul float %190, %197 %200 = fmul float %191, %197 %201 = fmul float %198, %34 %202 = fadd float %201, %31 %203 = fmul float %199, %34 %204 = fadd float %203, %32 %205 = fmul float %200, %34 %206 = fadd float %205, %33 %207 = bitcast float %81 to i32 %208 = bitcast float %82 to i32 %209 = insertelement <2 x i32> undef, i32 %207, i32 0 %210 = insertelement <2 x i32> %209, i32 %208, i32 1 %211 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %210, <32 x i8> %54, <16 x i8> %56, i32 2) %212 = extractelement <4 x float> %211, i32 0 %213 = extractelement <4 x float> %211, i32 1 %214 = extractelement <4 x float> %211, i32 2 %215 = fmul float %212, %35 %216 = fmul float %213, %36 %217 = fmul float %214, %37 %218 = fmul float %202, %215 %219 = fmul float %204, %216 %220 = fmul float %206, %217 %221 = fmul float %188, %218 %222 = fmul float %188, %219 %223 = fmul float %188, %220 %224 = fmul float %41, %44 %225 = fmul float %42, %44 %226 = fmul float %43, %44 %227 = bitcast float %83 to i32 %228 = bitcast float %84 to i32 %229 = insertelement <2 x i32> undef, i32 %227, i32 0 %230 = insertelement <2 x i32> %229, i32 %228, i32 1 %231 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %230, <32 x i8> %71, <16 x i8> %74, i32 2) %232 = extractelement <4 x float> %231, i32 0 %233 = extractelement <4 x float> %231, i32 1 %234 = extractelement <4 x float> %231, i32 2 %235 = fmul float %224, %232 %236 = fmul float %225, %233 %237 = fmul float %226, %234 %238 = fsub float 1.000000e+00, %24 %239 = fsub float 1.000000e+00, %25 %240 = fsub float 1.000000e+00, %26 %241 = fmul float %235, %238 %242 = fmul float %236, %239 %243 = fmul float %237, %240 %244 = fmul float %241, %30 %245 = fadd float %244, %27 %246 = fmul float %242, %30 %247 = fadd float %246, %28 %248 = fmul float %243, %30 %249 = fadd float %248, %29 %250 = fmul float %215, %245 %251 = fmul float %216, %247 %252 = fmul float %217, %249 %253 = fmul float %121, 0x3FEA20BD80000000 %254 = fmul float %122, 0x3FE279A740000000 %255 = fadd float %253, %254 %256 = fadd float %255, 0.000000e+00 %257 = call float @llvm.AMDIL.clamp.(float %256, float 0.000000e+00, float 1.000000e+00) %258 = fmul float %120, 0xBFE6A09E60000000 %259 = fmul float %121, 0xBFDA20BD80000000 %260 = fadd float %259, %258 %261 = fmul float %122, 0x3FE279A740000000 %262 = fadd float %260, %261 %263 = call float @llvm.AMDIL.clamp.(float %262, float 0.000000e+00, float 1.000000e+00) %264 = fmul float %121, 0xBFDA20BD80000000 %265 = fmul float %122, 0x3FE279A740000000 %266 = fadd float %265, %264 %267 = fmul float %120, 0x3FE6A09E60000000 %268 = fadd float %266, %267 %269 = call float @llvm.AMDIL.clamp.(float %268, float 0.000000e+00, float 1.000000e+00) %270 = fmul float %257, %257 %271 = fmul float %263, %263 %272 = fmul float %269, %269 %273 = call float @llvm.maxnum.f32(float %270, float 0x3EB0C6F7A0000000) %274 = call float @llvm.maxnum.f32(float %271, float 0x3EB0C6F7A0000000) %275 = call float @llvm.maxnum.f32(float %272, float 0x3EB0C6F7A0000000) %276 = fmul float %181, %273 %277 = fmul float %182, %274 %278 = fadd float %277, %276 %279 = fmul float %183, %275 %280 = fadd float %278, %279 %281 = fmul float %250, %280 %282 = fadd float %281, %221 %283 = fmul float %251, %280 %284 = fadd float %283, %222 %285 = fmul float %252, %280 %286 = fadd float %285, %223 %287 = fadd float %282, %24 %288 = fadd float %284, %25 %289 = fadd float %286, %26 %290 = fmul float %245, %50 %291 = fadd float %290, %287 %292 = fmul float %247, %51 %293 = fadd float %292, %288 %294 = fmul float %249, %52 %295 = fadd float %294, %289 %296 = fmul float %85, 3.906250e-03 %297 = call i32 @llvm.SI.packf16(float %291, float %293) %298 = bitcast i32 %297 to float %299 = call i32 @llvm.SI.packf16(float %295, float %296) %300 = bitcast i32 %299 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %298, float %300, float %298, float %300) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v2, 0x3f13cd3a ; 7E0402FF 3F13CD3A v_mov_b32_e32 v3, 0x3f5105ec ; 7E0602FF 3F5105EC v_mov_b32_e32 v4, 0xbf3504f3 ; 7E0802FF BF3504F3 v_mov_b32_e32 v5, 0xbed105ec ; 7E0A02FF BED105EC v_mov_b32_e32 v6, 0x3f3504f3 ; 7E0C02FF 3F3504F3 v_mov_b32_e32 v7, 0xff7fffff ; 7E0E02FF FF7FFFFF v_interp_p1_f32 v8, v0, 0, 0, [m0] ; C8200000 v_interp_p2_f32 v8, [v8], v1, 0, 0, [m0] ; C8210001 v_interp_p1_f32 v9, v0, 1, 0, [m0] ; C8240100 v_interp_p2_f32 v9, [v9], v1, 1, 0, [m0] ; C8250101 v_interp_p1_f32 v10, v0, 0, 1, [m0] ; C8280400 v_interp_p2_f32 v10, [v10], v1, 0, 1, [m0] ; C8290401 v_interp_p1_f32 v11, v0, 1, 1, [m0] ; C82C0500 v_interp_p2_f32 v11, [v11], v1, 1, 1, [m0] ; C82D0501 v_interp_p1_f32 v12, v0, 3, 2, [m0] ; C8300B00 v_interp_p2_f32 v12, [v12], v1, 3, 2, [m0] ; C8310B01 v_mul_f32_e32 v12, 0x3b800000, v12 ; 101818FF 3B800000 v_interp_p1_f32 v13, v0, 0, 3, [m0] ; C8340C00 v_interp_p2_f32 v13, [v13], v1, 0, 3, [m0] ; C8350C01 v_interp_p1_f32 v14, v0, 1, 3, [m0] ; C8380D00 v_interp_p2_f32 v14, [v14], v1, 1, 3, [m0] ; C8390D01 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx4 s[48:51], s[4:5], 0x4 ; C0980504 s_load_dwordx4 s[52:55], s[4:5], 0x8 ; C09A0508 s_load_dwordx4 s[16:19], s[4:5], 0xc ; C088050C s_load_dwordx4 s[36:39], s[4:5], 0x10 ; C0920510 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_load_dwordx8 s[56:63], s[6:7], 0x8 ; C0DC0708 s_load_dwordx8 s[64:71], s[6:7], 0x10 ; C0E00710 s_load_dwordx8 s[28:35], s[6:7], 0x18 ; C0CE0718 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s10, s[0:3], 0x1d ; C205011D s_buffer_load_dword s11, s[0:3], 0x1e ; C205811E s_buffer_load_dword s72, s[0:3], 0x20 ; C2240120 s_buffer_load_dword s73, s[0:3], 0x21 ; C2248121 s_buffer_load_dword s74, s[0:3], 0x22 ; C2250122 s_buffer_load_dword s75, s[0:3], 0x23 ; C2258123 s_buffer_load_dword s9, s[0:3], 0x24 ; C2048124 s_buffer_load_dword s5, s[0:3], 0x25 ; C2028125 s_buffer_load_dword s4, s[0:3], 0x26 ; C2020126 s_buffer_load_dword s8, s[0:3], 0x27 ; C2040127 s_buffer_load_dword s76, s[0:3], 0x1c ; C226011C s_load_dwordx8 s[40:47], s[6:7], 0x20 ; C0D40720 image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[64:71], s[52:55] ; F0800700 01B00F0A image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[56:63], s[48:51] ; F0800700 018E1208 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mad_f32 v1, 2.0, v15, -1.0 ; D2820001 03CE1EF4 v_mov_b32_e32 v15, s75 ; 7E1E024B v_mul_f32_e32 v15, s73, v15 ; 101E1E49 v_mov_b32_e32 v21, s75 ; 7E2A024B v_mov_b32_e32 v22, s75 ; 7E2C024B v_mul_f32_e32 v22, s72, v22 ; 102C2C48 image_sample v23, 1, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[40:47], s[36:39] ; F0800100 012A170A v_mul_f32_e32 v21, s74, v21 ; 102A2A4A image_sample v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[20:27], s[12:15] ; F0800700 00651808 image_sample v[8:10], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[28:35], s[16:19] ; F0800700 0087080A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v8, v8, v22 ; 10102D08 v_mul_f32_e32 v9, v9, v15 ; 10121F09 v_mul_f32_e32 v10, v10, v21 ; 10142B0A v_mad_f32 v11, 2.0, v16, -1.0 ; D282000B 03CE20F4 v_mad_f32 v15, 2.0, v17, -1.0 ; D282000F 03CE22F4 v_mul_f32_e32 v16, s76, v18 ; 1020244C v_mul_f32_e32 v17, s10, v19 ; 1022260A v_mul_f32_e32 v18, s11, v20 ; 1024280B v_mul_f32_e32 v19, v1, v1 ; 10260301 v_mad_f32 v19, v11, v11, v19 ; D2820013 044E170B v_mad_f32 v19, v15, v15, v19 ; D2820013 044E1F0F v_rsq_clamp_f32_e32 v19, v19 ; 7E265913 v_mul_f32_e32 v20, v13, v13 ; 10281B0D v_mad_f32 v20, v14, v14, v20 ; D2820014 04521D0E v_mad_f32 v20, v0, v0, v20 ; D2820014 04520100 v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 v_min_f32_e32 v19, 0x7f7fffff, v19 ; 1E2626FF 7F7FFFFF v_mul_f32_e32 v1, v19, v1 ; 10020313 v_mul_f32_e32 v11, v19, v11 ; 10161713 v_mul_f32_e32 v15, v19, v15 ; 101E1F13 v_min_f32_e32 v19, 0x7f7fffff, v20 ; 1E2628FF 7F7FFFFF v_mul_f32_e32 v20, v19, v13 ; 10281B13 v_mul_f32_e32 v20, v20, v1 ; 10280314 v_mul_f32_e32 v21, v19, v14 ; 102A1D13 v_mad_f32 v20, v11, v21, v20 ; D2820014 04522B0B v_mul_f32_e32 v21, v19, v0 ; 102A0113 v_mad_f32 v20, v15, v21, v20 ; D2820014 04522B0F v_mul_f32_e32 v21, v1, v20 ; 102A2901 v_mad_f32 v21, v20, v1, v21 ; D2820015 04560314 s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118 s_buffer_load_dword s7, s[0:3], 0x19 ; C2038119 s_buffer_load_dword s10, s[0:3], 0x1a ; C205011A v_mad_f32 v13, -v13, v19, v21 ; D282000D 2456270D v_mul_f32_e32 v21, v11, v20 ; 102A290B v_mad_f32 v21, v20, v11, v21 ; D2820015 04561714 v_mad_f32 v14, -v14, v19, v21 ; D282000E 2456270E v_mul_f32_e32 v21, v15, v20 ; 102A290F v_mad_f32 v20, v20, v15, v21 ; D2820014 04561F14 v_mad_f32 v0, -v0, v19, v20 ; D2820000 24522700 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v19, s6, v24 ; 10263006 v_mul_f32_e32 v20, s7, v25 ; 10283207 v_mul_f32_e32 v21, s10, v26 ; 102A340A v_mad_f32 v22, v0, v2, 0 ; D2820016 02020500 v_mad_f32 v22, v14, v3, v22 ; D2820016 045A070E v_mul_f32_e32 v24, v4, v13 ; 10301B04 v_mad_f32 v24, v14, v5, v24 ; D2820018 04620B0E v_mul_f32_e32 v14, v5, v14 ; 101C1D05 v_mad_f32 v24, v0, v2, v24 ; D2820018 04620500 v_mad_f32 v0, v0, v2, v14 ; D2820000 043A0500 v_mad_f32 v14, v15, v2, 0 ; D282000E 0202050F v_mad_f32 v3, v11, v3, v14 ; D2820003 043A070B v_mul_f32_e32 v4, v4, v1 ; 10080304 v_mad_f32 v4, v11, v5, v4 ; D2820004 04120B0B v_mul_f32_e32 v5, v5, v11 ; 100A1705 v_mad_f32 v4, v15, v2, v4 ; D2820004 0412050F s_buffer_load_dword s6, s[0:3], 0x17 ; C2030117 s_buffer_load_dword s7, s[0:3], 0x14 ; C2038114 s_buffer_load_dword s10, s[0:3], 0x15 ; C2050115 s_buffer_load_dword s11, s[0:3], 0x16 ; C2058116 v_mad_f32 v2, v15, v2, v5 ; D2820002 0416050F v_mad_f32 v0, v13, v6, v0 ; D2820000 04020D0D v_mov_b32_e32 v5, s8 ; 7E0A0208 v_mul_f32_e32 v5, s9, v5 ; 100A0A09 v_mov_b32_e32 v11, s8 ; 7E160208 v_mul_f32_e32 v11, s5, v11 ; 10161605 v_mov_b32_e32 v13, s8 ; 7E1A0208 v_mul_f32_e32 v13, s4, v13 ; 101A1A04 v_mul_f32_e32 v5, v23, v5 ; 100A0B17 v_mul_f32_e32 v11, v23, v11 ; 10161717 v_mul_f32_e32 v13, v23, v13 ; 101A1B17 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v14, s7 ; 7E1C0207 v_mad_f32 v5, s6, v5, v14 ; D2820005 043A0A06 v_mov_b32_e32 v14, s10 ; 7E1C020A v_mad_f32 v11, s6, v11, v14 ; D282000B 043A1606 v_mov_b32_e32 v14, s11 ; 7E1C020B v_mad_f32 v13, s6, v13, v14 ; D282000D 043A1A06 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_buffer_load_dword s5, s[0:3], 0x13 ; C2028113 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 s_buffer_load_dword s8, s[0:3], 0x2 ; C2040102 s_buffer_load_dword s9, s[0:3], 0x10 ; C2048110 s_buffer_load_dword s10, s[0:3], 0x11 ; C2050111 s_buffer_load_dword s11, s[0:3], 0x28 ; C2058128 s_buffer_load_dword s12, s[0:3], 0x2c ; C206012C s_buffer_load_dword s13, s[0:3], 0x2d ; C206812D s_buffer_load_dword s0, s[0:3], 0x2e ; C200012E v_add_f32_e64 v14, 0, v22 clamp ; D206080E 00022C80 v_add_f32_e64 v15, 0, v24 clamp ; D206080F 00023080 v_max_f32_e32 v14, 0x358637bd, v14 ; 201C1CFF 358637BD v_max_f32_e32 v15, 0x358637bd, v15 ; 201E1EFF 358637BD v_log_f32_e64 v14, |v14| ; D34E010E 0000010E v_log_f32_e64 v15, |v15| ; D34E010F 0000010F v_mad_f32 v1, v1, v6, v2 ; D2820001 040A0D01 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e64 v2, 1.0, s11 ; D2060002 000016F2 v_max_f32_e32 v6, v7, v14 ; 200C1D07 v_max_f32_e32 v14, v7, v15 ; 201C1F07 v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mul_f32_e32 v14, v2, v14 ; 101C1D02 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mul_f32_e32 v6, v6, v16 ; 100C2106 v_mad_f32 v6, v17, v14, v6 ; D2820006 041A1D11 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_max_f32_e32 v3, 0x358637bd, v3 ; 200606FF 358637BD v_mul_f32_e32 v3, v3, v16 ; 10062103 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_max_f32_e32 v4, 0x358637bd, v4 ; 200808FF 358637BD v_mad_f32 v3, v17, v4, v3 ; D2820003 040E0911 v_sub_f32_e64 v4, 1.0, s6 ; D2080004 00000CF2 v_mul_f32_e32 v4, v4, v8 ; 10081104 v_sub_f32_e64 v8, 1.0, s7 ; D2080008 00000EF2 v_mul_f32_e32 v8, v8, v9 ; 10101308 v_sub_f32_e64 v9, 1.0, s8 ; D2080009 000010F2 v_mul_f32_e32 v9, v9, v10 ; 10121509 v_mov_b32_e32 v10, s9 ; 7E140209 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_max_f32_e32 v0, 0x358637bd, v0 ; 200000FF 358637BD v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mad_f32 v4, s5, v4, v10 ; D2820004 042A0805 v_mov_b32_e32 v10, s10 ; 7E14020A v_mad_f32 v8, s5, v8, v10 ; D2820008 042A1005 v_mov_b32_e32 v10, s4 ; 7E140204 v_mad_f32 v9, s5, v9, v10 ; D2820009 042A1205 v_max_f32_e32 v0, v7, v0 ; 20000107 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mad_f32 v0, v18, v0, v6 ; D2820000 041A0112 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_max_f32_e32 v1, 0x358637bd, v1 ; 200202FF 358637BD v_mad_f32 v1, v18, v1, v3 ; D2820001 040E0312 v_mul_f32_e32 v2, v19, v5 ; 10040B13 v_mul_f32_e32 v3, v20, v11 ; 10061714 v_mul_f32_e32 v5, v21, v13 ; 100A1B15 v_mul_f32_e32 v6, v4, v19 ; 100C2704 v_mul_f32_e32 v7, v8, v20 ; 100E2908 v_mul_f32_e32 v10, v9, v21 ; 10142B09 v_mad_f32 v2, v0, v2, s6 ; D2820002 001A0500 v_mad_f32 v2, v6, v1, v2 ; D2820002 040A0306 v_mad_f32 v3, v0, v3, s7 ; D2820003 001E0700 v_mad_f32 v3, v7, v1, v3 ; D2820003 040E0307 v_mad_f32 v0, v0, v5, s8 ; D2820000 00220B00 v_mad_f32 v0, v10, v1, v0 ; D2820000 0402030A v_mad_f32 v1, v4, s12, v2 ; D2820001 04081904 v_mad_f32 v2, v8, s13, v3 ; D2820002 040C1B08 v_mad_f32 v0, v9, s0, v0 ; D2820000 04000109 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v0, v0, v12 ; 5E001900 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 28 Code Size: 1192 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL IN[2], TEXCOORD[4], PERSPECTIVE DCL IN[3], TEXCOORD[5], PERSPECTIVE DCL IN[4], TEXCOORD[6], PERSPECTIVE DCL IN[5], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..7] IMM[0] FLT32 { 2.0000, -1.0000, -0.0000, 0.0000} IMM[1] FLT32 { 8.0000, 0.0398, 0.0000, 340282346638528859811704183484516925440.0000} 0: DP3 TEMP[0].x, IN[4], IN[4] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[4], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[2], IN[2] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[2], TEMP[0].xxxx 8: TEX TEMP[3], IN[1], SAMP[1], 2D 9: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[0].x, TEMP[3], TEMP[3] 11: RSQ TEMP[0].x, TEMP[0].xxxx 12: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 13: MUL TEMP[4].xyz, TEMP[3], TEMP[0].xxxx 14: DP3 TEMP[1].w, TEMP[4], TEMP[1] 15: MUL TEMP[3].xyz, TEMP[1].wwww, TEMP[4] 16: MAD TEMP[1].xyz, TEMP[3], IMM[0].xxxx, -TEMP[1] 17: TEX TEMP[3], IN[0], SAMP[4], 2D 18: ADD TEMP[1].w, TEMP[3].xxxx, CONST[10].xxxx 19: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[10].yyyy 20: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].zzzz 21: POW TEMP[3].x, |TEMP[1].wwww|, CONST[10].zzzz 22: CMP TEMP[3].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[3].xxxx 23: DP3 TEMP[1].w, IN[3], IN[3] 24: RSQ TEMP[0], |TEMP[1].wwww| 25: MIN TEMP[2].w, IMM[1].wwww, TEMP[0] 26: MUL TEMP[5].xyz, TEMP[2].wwww, IN[3] 27: DP3 TEMP[2].w, TEMP[5], -CONST[11] 28: ADD TEMP[2].w, TEMP[2].wwww, -CONST[12].xxxx 29: MUL_SAT TEMP[2].w, TEMP[2].wwww, CONST[12].yyyy 30: MUL TEMP[2].w, TEMP[2].wwww, TEMP[2].wwww 31: ADD TEMP[1].w, -TEMP[1].wwww, -IMM[0].yyyy 32: MUL TEMP[3].w, TEMP[3].zzzz, TEMP[1].wwww 33: MUL TEMP[5], TEMP[2].wwww, TEMP[3].wwww 34: MOV TEMP[6], TEMP[5] 35: KILL_IF TEMP[6] 36: UIF CONST[240].xxxx :42 37: RCP TEMP[3].w, IN[5].wwww 38: MUL TEMP[5].xy, TEMP[3].wwww, IN[5] 39: MAD TEMP[5].xy, TEMP[5], CONST[1], CONST[1].wzzw 40: TEX TEMP[6], TEMP[5], SAMP[0], 2D 41: MUL TEMP[3].xyz, TEMP[3].zzzz, TEMP[6] 42: ENDIF 43: MOV TEMP[5].y, IMM[0].yyyy 44: ADD TEMP[5].xyz, -TEMP[5].yyyy, -CONST[0] 45: MUL TEMP[6].xyz, CONST[6], CONST[6].wwww 46: TEX TEMP[7], IN[1], SAMP[2], 2D 47: MUL TEMP[6].xyz, TEMP[6], TEMP[7] 48: MUL TEMP[5].xyz, TEMP[5], TEMP[6] 49: MAD TEMP[5].xyz, TEMP[5], CONST[4].wwww, CONST[4] 50: MAX TEMP[3].w, TEMP[1].wwww, IMM[0].wwww 51: ADD TEMP[1].w, TEMP[3].wwww, IMM[0].zzzz 52: POW TEMP[4].w, |TEMP[3].wwww|, CONST[9].wwww 53: MUL TEMP[6].xyz, CONST[7], CONST[7].wwww 54: TEX TEMP[7], IN[1], SAMP[3], 2D 55: MUL TEMP[6].xyz, TEMP[6], TEMP[7].xxxx 56: MAD TEMP[6].xyz, TEMP[6], CONST[5].wwww, CONST[5] 57: DP3_SAT TEMP[3].w, TEMP[4], TEMP[2] 58: ADD TEMP[4].x, TEMP[3].wwww, IMM[0].zzzz 59: DP3_SAT TEMP[1].x, TEMP[1], TEMP[2] 60: ADD TEMP[1].y, TEMP[1].xxxx, IMM[0].zzzz 61: POW TEMP[2].x, |TEMP[1].xxxx|, CONST[8].xxxx 62: MOV TEMP[1].x, CONST[8].xxxx 63: ADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx 64: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 65: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 66: MUL TEMP[2].xyz, TEMP[3].wwww, TEMP[5] 67: CMP TEMP[2].xyz, TEMP[4].xxxx, IMM[0].wwww, TEMP[2] 68: MUL TEMP[4].xyz, TEMP[6], TEMP[1].xxxx 69: CMP TEMP[1], TEMP[1].yyyw, IMM[0].wwww, TEMP[4] 70: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 71: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1] 72: MUL TEMP[1].xyz, TEMP[3], TEMP[1] 73: MUL TEMP[1].xyz, TEMP[1], CONST[9] 74: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[1] 75: CMP OUT[0].xyz, -TEMP[5].wwww, TEMP[1], IMM[0].wwww 76: MOV OUT[0].w, IMM[0].wwww 77: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %57 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %58 = load <32 x i8>, <32 x i8> addrspace(2)* %57, align 32, !tbaa !0 %59 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %62 = bitcast <8 x i32> addrspace(2)* %61 to <32 x i8> addrspace(2)* %63 = load <32 x i8>, <32 x i8> addrspace(2)* %62, align 32, !tbaa !0 %64 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %65 = bitcast <4 x i32> addrspace(2)* %64 to <16 x i8> addrspace(2)* %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %68 = bitcast <8 x i32> addrspace(2)* %67 to <32 x i8> addrspace(2)* %69 = load <32 x i8>, <32 x i8> addrspace(2)* %68, align 32, !tbaa !0 %70 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %71 = bitcast <4 x i32> addrspace(2)* %70 to <16 x i8> addrspace(2)* %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %74 = bitcast <8 x i32> addrspace(2)* %73 to <32 x i8> addrspace(2)* %75 = load <32 x i8>, <32 x i8> addrspace(2)* %74, align 32, !tbaa !0 %76 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %77 = bitcast <4 x i32> addrspace(2)* %76 to <16 x i8> addrspace(2)* %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 %79 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %80 = bitcast <8 x i32> addrspace(2)* %79 to <32 x i8> addrspace(2)* %81 = load <32 x i8>, <32 x i8> addrspace(2)* %80, align 32, !tbaa !0 %82 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %83 = bitcast <4 x i32> addrspace(2)* %82 to <16 x i8> addrspace(2)* %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 %85 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %91 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %98 = fmul float %95, %95 %99 = fmul float %96, %96 %100 = fadd float %99, %98 %101 = fmul float %97, %97 %102 = fadd float %100, %101 %103 = call float @llvm.AMDGPU.rsq.clamped.f32(float %102) %104 = call float @llvm.minnum.f32(float %103, float 0x47EFFFFFE0000000) %105 = fmul float %95, %104 %106 = fmul float %96, %104 %107 = fmul float %97, %104 %108 = fmul float %89, %89 %109 = fmul float %90, %90 %110 = fadd float %109, %108 %111 = fmul float %91, %91 %112 = fadd float %110, %111 %113 = call float @llvm.AMDGPU.rsq.clamped.f32(float %112) %114 = call float @llvm.minnum.f32(float %113, float 0x47EFFFFFE0000000) %115 = fmul float %89, %114 %116 = fmul float %90, %114 %117 = fmul float %91, %114 %118 = bitcast float %87 to i32 %119 = bitcast float %88 to i32 %120 = insertelement <2 x i32> undef, i32 %118, i32 0 %121 = insertelement <2 x i32> %120, i32 %119, i32 1 %122 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %121, <32 x i8> %63, <16 x i8> %66, i32 2) %123 = extractelement <4 x float> %122, i32 0 %124 = extractelement <4 x float> %122, i32 1 %125 = extractelement <4 x float> %122, i32 2 %126 = fmul float %123, 2.000000e+00 %127 = fadd float %126, -1.000000e+00 %128 = fmul float %124, 2.000000e+00 %129 = fadd float %128, -1.000000e+00 %130 = fmul float %125, 2.000000e+00 %131 = fadd float %130, -1.000000e+00 %132 = fmul float %127, %127 %133 = fmul float %129, %129 %134 = fadd float %133, %132 %135 = fmul float %131, %131 %136 = fadd float %134, %135 %137 = call float @llvm.AMDGPU.rsq.clamped.f32(float %136) %138 = call float @llvm.minnum.f32(float %137, float 0x47EFFFFFE0000000) %139 = fmul float %127, %138 %140 = fmul float %129, %138 %141 = fmul float %131, %138 %142 = fmul float %139, %105 %143 = fmul float %140, %106 %144 = fadd float %143, %142 %145 = fmul float %141, %107 %146 = fadd float %144, %145 %147 = fmul float %146, %139 %148 = fmul float %146, %140 %149 = fmul float %146, %141 %150 = fmul float %147, 2.000000e+00 %151 = fsub float %150, %105 %152 = fmul float %148, 2.000000e+00 %153 = fsub float %152, %106 %154 = fmul float %149, 2.000000e+00 %155 = fsub float %154, %107 %156 = bitcast float %85 to i32 %157 = bitcast float %86 to i32 %158 = insertelement <2 x i32> undef, i32 %156, i32 0 %159 = insertelement <2 x i32> %158, i32 %157, i32 1 %160 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %159, <32 x i8> %81, <16 x i8> %84, i32 2) %161 = extractelement <4 x float> %160, i32 0 %162 = fadd float %161, %48 %163 = fmul float %162, %49 %164 = call float @llvm.AMDIL.clamp.(float %163, float 0.000000e+00, float 1.000000e+00) %165 = fadd float %164, 0xBEB0C6F7A0000000 %166 = call float @fabs(float %164) %167 = call float @llvm.pow.f32(float %166, float %50) %168 = call float @llvm.AMDGPU.cndlt(float %165, float 0.000000e+00, float %167) %169 = call float @llvm.AMDGPU.cndlt(float %165, float 0.000000e+00, float %167) %170 = call float @llvm.AMDGPU.cndlt(float %165, float 0.000000e+00, float %167) %171 = fmul float %92, %92 %172 = fmul float %93, %93 %173 = fadd float %172, %171 %174 = fmul float %94, %94 %175 = fadd float %173, %174 %176 = call float @fabs(float %175) %177 = call float @llvm.AMDGPU.rsq.clamped.f32(float %176) %178 = call float @llvm.minnum.f32(float %177, float 0x47EFFFFFE0000000) %179 = fmul float %178, %92 %180 = fmul float %178, %93 %181 = fmul float %178, %94 %182 = fmul float %51, %179 %183 = fsub float -0.000000e+00, %182 %184 = fmul float %52, %180 %185 = fsub float %183, %184 %186 = fmul float %53, %181 %187 = fsub float %185, %186 %188 = fsub float %187, %54 %189 = fmul float %188, %55 %190 = call float @llvm.AMDIL.clamp.(float %189, float 0.000000e+00, float 1.000000e+00) %191 = fmul float %190, %190 %192 = fsub float 1.000000e+00, %175 %193 = fmul float %170, %192 %194 = fmul float %191, %193 %195 = fmul float %191, %193 %196 = fmul float %191, %193 %197 = fmul float %191, %193 %198 = fcmp olt float %194, 0.000000e+00 %199 = fcmp olt float %195, 0.000000e+00 %200 = fcmp olt float %196, 0.000000e+00 %201 = fcmp olt float %197, 0.000000e+00 %202 = or i1 %201, %200 %203 = or i1 %202, %199 %204 = or i1 %203, %198 %205 = select i1 %204, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %205) %206 = bitcast float %56 to i32 %207 = icmp eq i32 %206, 0 br i1 %207, label %ENDIF, label %IF IF: ; preds = %main_body %208 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %209 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %210 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %211 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %212 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %213 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %214 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %215 = fdiv float 1.000000e+00, %208 %216 = fmul float %215, %210 %217 = fmul float %215, %209 %218 = fmul float %216, %214 %219 = fadd float %218, %211 %220 = fmul float %217, %213 %221 = fadd float %220, %212 %222 = bitcast float %219 to i32 %223 = bitcast float %221 to i32 %224 = insertelement <2 x i32> undef, i32 %222, i32 0 %225 = insertelement <2 x i32> %224, i32 %223, i32 1 %226 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %225, <32 x i8> %58, <16 x i8> %60, i32 2) %227 = extractelement <4 x float> %226, i32 0 %228 = extractelement <4 x float> %226, i32 1 %229 = extractelement <4 x float> %226, i32 2 %230 = fmul float %170, %227 %231 = fmul float %170, %228 %232 = fmul float %170, %229 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp12.0 = phi float [ %230, %IF ], [ %168, %main_body ] %temp13.0 = phi float [ %231, %IF ], [ %169, %main_body ] %temp14.0 = phi float [ %232, %IF ], [ %170, %main_body ] %233 = fsub float 1.000000e+00, %24 %234 = fsub float 1.000000e+00, %25 %235 = fsub float 1.000000e+00, %26 %236 = fmul float %35, %38 %237 = fmul float %36, %38 %238 = fmul float %37, %38 %239 = bitcast float %87 to i32 %240 = bitcast float %88 to i32 %241 = insertelement <2 x i32> undef, i32 %239, i32 0 %242 = insertelement <2 x i32> %241, i32 %240, i32 1 %243 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %242, <32 x i8> %69, <16 x i8> %72, i32 2) %244 = extractelement <4 x float> %243, i32 0 %245 = extractelement <4 x float> %243, i32 1 %246 = extractelement <4 x float> %243, i32 2 %247 = fmul float %236, %244 %248 = fmul float %237, %245 %249 = fmul float %238, %246 %250 = fmul float %233, %247 %251 = fmul float %234, %248 %252 = fmul float %235, %249 %253 = fmul float %250, %30 %254 = fadd float %253, %27 %255 = fmul float %251, %30 %256 = fadd float %255, %28 %257 = fmul float %252, %30 %258 = fadd float %257, %29 %259 = call float @llvm.maxnum.f32(float %192, float 0.000000e+00) %260 = fadd float %259, 0xBEB0C6F7A0000000 %261 = call float @fabs(float %259) %262 = call float @llvm.pow.f32(float %261, float %47) %263 = fmul float %39, %42 %264 = fmul float %40, %42 %265 = fmul float %41, %42 %266 = bitcast float %87 to i32 %267 = bitcast float %88 to i32 %268 = insertelement <2 x i32> undef, i32 %266, i32 0 %269 = insertelement <2 x i32> %268, i32 %267, i32 1 %270 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %269, <32 x i8> %75, <16 x i8> %78, i32 2) %271 = extractelement <4 x float> %270, i32 0 %272 = fmul float %263, %271 %273 = fmul float %264, %271 %274 = fmul float %265, %271 %275 = fmul float %272, %34 %276 = fadd float %275, %31 %277 = fmul float %273, %34 %278 = fadd float %277, %32 %279 = fmul float %274, %34 %280 = fadd float %279, %33 %281 = fmul float %139, %115 %282 = fmul float %140, %116 %283 = fadd float %282, %281 %284 = fmul float %141, %117 %285 = fadd float %283, %284 %286 = call float @llvm.AMDIL.clamp.(float %285, float 0.000000e+00, float 1.000000e+00) %287 = fadd float %286, 0xBEB0C6F7A0000000 %288 = fmul float %151, %115 %289 = fmul float %153, %116 %290 = fadd float %289, %288 %291 = fmul float %155, %117 %292 = fadd float %290, %291 %293 = call float @llvm.AMDIL.clamp.(float %292, float 0.000000e+00, float 1.000000e+00) %294 = fadd float %293, 0xBEB0C6F7A0000000 %295 = call float @fabs(float %293) %296 = call float @llvm.pow.f32(float %295, float %43) %297 = fadd float %43, 8.000000e+00 %298 = fmul float %297, %296 %299 = fmul float %298, 0x3FA45F3060000000 %300 = fmul float %286, %254 %301 = fmul float %286, %256 %302 = fmul float %286, %258 %303 = call float @llvm.AMDGPU.cndlt(float %287, float 0.000000e+00, float %300) %304 = call float @llvm.AMDGPU.cndlt(float %287, float 0.000000e+00, float %301) %305 = call float @llvm.AMDGPU.cndlt(float %287, float 0.000000e+00, float %302) %306 = fmul float %276, %299 %307 = fmul float %278, %299 %308 = fmul float %280, %299 %309 = call float @llvm.AMDGPU.cndlt(float %294, float 0.000000e+00, float %306) %310 = call float @llvm.AMDGPU.cndlt(float %294, float 0.000000e+00, float %307) %311 = call float @llvm.AMDGPU.cndlt(float %294, float 0.000000e+00, float %308) %312 = call float @llvm.AMDGPU.cndlt(float %260, float 0.000000e+00, float %262) %313 = fadd float %309, %303 %314 = fadd float %310, %304 %315 = fadd float %311, %305 %316 = fmul float %312, %313 %317 = fmul float %312, %314 %318 = fmul float %312, %315 %319 = fmul float %temp12.0, %316 %320 = fmul float %temp13.0, %317 %321 = fmul float %temp14.0, %318 %322 = fmul float %319, %44 %323 = fmul float %320, %45 %324 = fmul float %321, %46 %325 = fmul float %191, %322 %326 = fmul float %191, %323 %327 = fmul float %191, %324 %328 = fsub float -0.000000e+00, %197 %329 = call float @llvm.AMDGPU.cndlt(float %328, float %325, float 0.000000e+00) %330 = fsub float -0.000000e+00, %197 %331 = call float @llvm.AMDGPU.cndlt(float %330, float %326, float 0.000000e+00) %332 = fsub float -0.000000e+00, %197 %333 = call float @llvm.AMDGPU.cndlt(float %332, float %327, float 0.000000e+00) %334 = call i32 @llvm.SI.packf16(float %329, float %331) %335 = bitcast i32 %334 to float %336 = call i32 @llvm.SI.packf16(float %333, float 0.000000e+00) %337 = bitcast i32 %336 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %335, float %337, float %335, float %337) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[16:19], s[2:3], 0x0 ; C0880300 s_movk_i32 s0, 0xf00 ; B0000F00 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 v_mov_b32_e32 v10, 0xb58637bd ; 7E1402FF B58637BD v_interp_p1_f32 v11, v0, 0, 0, [m0] ; C82C0000 v_interp_p2_f32 v11, [v11], v1, 0, 0, [m0] ; C82D0001 v_interp_p1_f32 v12, v0, 1, 0, [m0] ; C8300100 v_interp_p2_f32 v12, [v12], v1, 1, 0, [m0] ; C8310101 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s44, s[16:19], s0 ; C2161000 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 s_buffer_load_dword s12, s[16:19], 0x0 ; C2061100 s_buffer_load_dword s11, s[16:19], 0x1 ; C2059101 s_buffer_load_dword s10, s[16:19], 0x2 ; C2051102 s_buffer_load_dword s3, s[16:19], 0x10 ; C2019110 s_buffer_load_dword s8, s[16:19], 0x11 ; C2041111 s_buffer_load_dword s9, s[16:19], 0x12 ; C2049112 s_buffer_load_dword s45, s[16:19], 0x13 ; C2169113 s_buffer_load_dword s1, s[16:19], 0x14 ; C2009114 s_buffer_load_dword s2, s[16:19], 0x15 ; C2011115 s_buffer_load_dword s0, s[16:19], 0x16 ; C2001116 s_buffer_load_dword s46, s[16:19], 0x17 ; C2171117 s_buffer_load_dword s13, s[16:19], 0x18 ; C2069118 s_buffer_load_dword s14, s[16:19], 0x19 ; C2071119 s_buffer_load_dword s15, s[16:19], 0x1a ; C207911A s_buffer_load_dword s47, s[16:19], 0x1b ; C217911B s_buffer_load_dword s48, s[16:19], 0x29 ; C2181129 s_buffer_load_dword s49, s[16:19], 0x2a ; C218912A s_buffer_load_dword s50, s[16:19], 0x2c ; C219112C s_buffer_load_dword s51, s[16:19], 0x2d ; C219912D s_buffer_load_dword s52, s[16:19], 0x2e ; C21A112E v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v13, v0, 0, 3, [m0] ; C8340C00 v_interp_p2_f32 v13, [v13], v1, 0, 3, [m0] ; C8350C01 v_interp_p1_f32 v14, v0, 1, 3, [m0] ; C8380D00 v_interp_p2_f32 v14, [v14], v1, 1, 3, [m0] ; C8390D01 v_interp_p1_f32 v15, v0, 2, 3, [m0] ; C83C0E00 v_interp_p2_f32 v15, [v15], v1, 2, 3, [m0] ; C83D0E01 v_interp_p1_f32 v6, v0, 0, 4, [m0] ; C8181000 v_interp_p2_f32 v6, [v6], v1, 0, 4, [m0] ; C8191001 v_interp_p1_f32 v5, v0, 1, 4, [m0] ; C8141100 v_interp_p2_f32 v5, [v5], v1, 1, 4, [m0] ; C8151101 v_interp_p1_f32 v4, v0, 2, 4, [m0] ; C8101200 v_interp_p2_f32 v4, [v4], v1, 2, 4, [m0] ; C8111201 s_buffer_load_dword s53, s[16:19], 0x28 ; C21A9128 s_load_dwordx4 s[24:27], s[4:5], 0x10 ; C08C0510 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 s_load_dwordx8 s[36:43], s[6:7], 0x20 ; C0D20720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[16:18], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[20:23] ; F0800700 00A71002 image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[36:43], s[24:27] ; F0800100 00C90B0B s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v11, s53, v11 ; 06161635 v_mul_f32_e32 v11, s48, v11 ; 10161630 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_add_f32_e32 v10, v11, v10 ; 0614150B v_and_b32_e32 v11, 0x7fffffff, v11 ; 361616FF 7FFFFFFF v_log_f32_e32 v11, v11 ; 7E164F0B v_mad_f32 v19, 2.0, v16, -1.0 ; D2820013 03CE20F4 v_mad_f32 v20, 2.0, v17, -1.0 ; D2820014 03CE22F4 v_mad_f32 v21, 2.0, v18, -1.0 ; D2820015 03CE24F4 v_mul_legacy_f32_e32 v11, s49, v11 ; 0E161631 v_exp_f32_e32 v11, v11 ; 7E164B0B v_mul_f32_e32 v12, v13, v13 ; 10181B0D v_mad_f32 v12, v14, v14, v12 ; D282000C 04321D0E v_mad_f32 v12, v15, v15, v12 ; D282000C 04321F0F v_rsq_clamp_f32_e64 v16, |v12| ; D3580110 0000010C s_buffer_load_dword s20, s[16:19], 0x30 ; C20A1130 s_buffer_load_dword s21, s[16:19], 0x31 ; C20A9131 v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v10, v11, 0, vcc ; D200000A 01A9010B v_min_f32_e32 v11, 0x7f7fffff, v16 ; 1E1620FF 7F7FFFFF v_mul_f32_e32 v13, v13, v11 ; 101A170D v_mul_f32_e32 v13, s50, v13 ; 101A1A32 v_mul_f32_e32 v14, v14, v11 ; 101C170E v_mad_f32 v13, -s51, v14, -v13 ; D282000D A4361C33 v_mul_f32_e32 v11, v15, v11 ; 1016170F v_mad_f32 v11, -s52, v11, v13 ; D282000B 24361634 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v11, s20, v11 ; 0A161614 v_mul_f32_e32 v11, s21, v11 ; 10161615 v_mul_f32_e32 v13, v6, v6 ; 101A0D06 v_mad_f32 v13, v5, v5, v13 ; D282000D 04360B05 v_mad_f32 v13, v4, v4, v13 ; D282000D 04360904 v_rsq_clamp_f32_e32 v22, v13 ; 7E2C590D v_mul_f32_e32 v13, v7, v7 ; 101A0F07 v_mad_f32 v13, v8, v8, v13 ; D282000D 04361108 v_mad_f32 v13, v9, v9, v13 ; D282000D 04361309 v_rsq_clamp_f32_e32 v23, v13 ; 7E2E590D v_mul_f32_e32 v13, v19, v19 ; 101A2713 v_mad_f32 v13, v20, v20, v13 ; D282000D 04362914 v_mad_f32 v13, v21, v21, v13 ; D282000D 04362B15 v_rsq_clamp_f32_e32 v24, v13 ; 7E30590D v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_mul_f32_e32 v11, v11, v11 ; 1016170B v_sub_f32_e32 v13, 1.0, v12 ; 081A18F2 s_buffer_load_dword s22, s[16:19], 0x24 ; C20B1124 s_buffer_load_dword s21, s[16:19], 0x25 ; C20A9125 s_buffer_load_dword s20, s[16:19], 0x26 ; C20A1126 s_buffer_load_dword s24, s[16:19], 0x27 ; C20C1127 s_buffer_load_dword s25, s[16:19], 0x1c ; C20C911C s_buffer_load_dword s26, s[16:19], 0x1d ; C20D111D s_buffer_load_dword s27, s[16:19], 0x1e ; C20D911E s_buffer_load_dword s28, s[16:19], 0x1f ; C20E111F s_buffer_load_dword s23, s[16:19], 0x20 ; C20B9120 v_mul_f32_e32 v12, v13, v10 ; 1018150D v_mul_f32_e32 v12, v12, v11 ; 1018170C v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v14, 0, -1.0, vcc ; D200000E 01A9E680 v_cndmask_b32_e64 v14, v14, -1.0, vcc ; D200000E 01A9E70E v_cndmask_b32_e64 v14, v14, -1.0, vcc ; D200000E 01A9E70E v_cndmask_b32_e64 v14, v14, -1.0, vcc ; D200000E 01A9E70E v_cmpx_le_f32_e32 vcc, 0, v14 ; 7C261C80 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_mov_b32_e32 v17, s45 ; 7E22022D v_mov_b32_e32 v16, s46 ; 7E20022E v_cmp_ne_i32_e64 s[30:31], 0, s44 ; D10A001E 00005880 v_mov_b32_e32 v25, s47 ; 7E32022F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v18, s28 ; 7E24021C v_mov_b32_e32 v14, v10 ; 7E1C030A v_mov_b32_e32 v15, v10 ; 7E1E030A s_and_saveexec_b64 s[28:29], s[30:31] ; BE9C241E s_xor_b64 s[28:29], exec, s[28:29] ; 899C1C7E s_cbranch_execz BB0_2 ; BF880000 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_load_dwordx4 s[40:43], s[4:5], 0x0 ; C0940500 v_interp_p1_f32 v14, v0, 3, 5, [m0] ; C8381700 s_buffer_load_dword s30, s[16:19], 0x6 ; C20F1106 s_buffer_load_dword s31, s[16:19], 0x7 ; C20F9107 s_buffer_load_dword s44, s[16:19], 0x4 ; C2161104 s_buffer_load_dword s45, s[16:19], 0x5 ; C2169105 v_interp_p2_f32 v14, [v14], v1, 3, 5, [m0] ; C8391701 v_interp_p1_f32 v15, v0, 1, 5, [m0] ; C83C1500 v_rcp_f32_e32 v14, v14 ; 7E1C550E v_interp_p2_f32 v15, [v15], v1, 1, 5, [m0] ; C83D1501 v_interp_p1_f32 v0, v0, 0, 5, [m0] ; C8001400 v_interp_p2_f32 v0, [v0], v1, 0, 5, [m0] ; C8011401 v_mul_f32_e32 v0, v0, v14 ; 10001D00 v_mul_f32_e32 v1, v15, v14 ; 10021D0F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v14, s31 ; 7E1C021F v_mad_f32 v14, s44, v0, v14 ; D282000E 043A002C v_mov_b32_e32 v0, s30 ; 7E00021E v_mad_f32 v15, s45, v1, v0 ; D282000F 0402022D image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[32:39], s[40:43] ; F0800700 01481A0E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v14, v26, v10 ; 101C151A v_mul_f32_e32 v15, v27, v10 ; 101E151B v_mul_f32_e32 v10, v28, v10 ; 1014151C s_or_b64 exec, exec, s[28:29] ; 88FE1C7E v_min_f32_e32 v0, 0x7f7fffff, v22 ; 1E002CFF 7F7FFFFF v_min_f32_e32 v1, 0x7f7fffff, v23 ; 1E022EFF 7F7FFFFF v_mul_f32_e32 v7, v1, v7 ; 100E0F01 v_mul_f32_e32 v8, v1, v8 ; 10101101 v_mul_f32_e32 v1, v1, v9 ; 10021301 v_min_f32_e32 v9, 0x7f7fffff, v24 ; 1E1230FF 7F7FFFFF v_mul_f32_e32 v19, v9, v19 ; 10262709 v_mul_f32_e32 v20, v9, v20 ; 10282909 v_mul_f32_e32 v9, v9, v21 ; 10122B09 s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 s_load_dwordx4 s[16:19], s[4:5], 0xc ; C088050C s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710 s_load_dwordx8 s[28:35], s[6:7], 0x18 ; C0CE0718 v_mul_f32_e32 v21, s13, v25 ; 102A320D v_mul_f32_e32 v22, s14, v25 ; 102C320E v_mul_f32_e32 v23, s15, v25 ; 102E320F v_mul_f32_e32 v24, v0, v6 ; 10300D00 v_mul_f32_e32 v24, v24, v19 ; 10302718 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[40:47], s[36:39] ; F0800700 012A1902 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v25, v21 ; 102A2B19 v_mul_f32_e32 v22, v26, v22 ; 102C2D1A v_mul_f32_e32 v23, v27, v23 ; 102E2F1B v_mul_f32_e32 v25, v0, v5 ; 10320B00 v_mad_f32 v24, v20, v25, v24 ; D2820018 04623314 v_mul_f32_e32 v25, v0, v4 ; 10320900 v_mad_f32 v24, v9, v25, v24 ; D2820018 04623309 v_mul_f32_e32 v25, v19, v24 ; 10323113 v_mad_f32 v25, v24, v19, v25 ; D2820019 04662718 v_mad_f32 v6, -v6, v0, v25 ; D2820006 24660106 v_mul_f32_e32 v25, v20, v24 ; 10323114 v_mad_f32 v25, v24, v20, v25 ; D2820019 04662918 v_mad_f32 v5, -v5, v0, v25 ; D2820005 24660105 v_mul_f32_e32 v25, v9, v24 ; 10323109 v_mad_f32 v24, v24, v9, v25 ; D2820018 04661318 v_mad_f32 v0, -v4, v0, v24 ; D2820000 24620104 v_sub_f32_e64 v4, 1.0, s12 ; D2080004 000018F2 v_mul_f32_e32 v4, v21, v4 ; 10080915 v_sub_f32_e64 v21, 1.0, s11 ; D2080015 000016F2 v_mul_f32_e32 v21, v22, v21 ; 102A2B16 v_sub_f32_e64 v22, 1.0, s10 ; D2080016 000014F2 v_mul_f32_e32 v22, v23, v22 ; 102C2D17 image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[16:19] ; F0800100 00870202 v_mad_f32 v3, v17, v4, s3 ; D2820003 000E0911 v_mad_f32 v4, v21, v17, s8 ; D2820004 00222315 v_mad_f32 v17, v22, v17, s9 ; D2820011 00262316 v_mul_f32_e32 v21, s25, v18 ; 102A2419 v_mul_f32_e32 v22, s26, v18 ; 102C241A v_mul_f32_e32 v18, s27, v18 ; 1024241B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v2, v21 ; 102A2B02 v_mul_f32_e32 v22, v2, v22 ; 102C2D02 v_mul_f32_e32 v2, v2, v18 ; 10042502 v_mad_f32 v18, v16, v21, s1 ; D2820012 00062B10 v_mad_f32 v21, v22, v16, s2 ; D2820015 000A2116 v_mad_f32 v2, v2, v16, s0 ; D2820002 00022102 v_mul_f32_e32 v16, v7, v19 ; 10202707 v_mad_f32 v16, v20, v8, v16 ; D2820010 04421114 v_mul_f32_e32 v6, v7, v6 ; 100C0D07 v_mad_f32 v5, v5, v8, v6 ; D2820005 041A1105 v_mad_f32 v6, v9, v1, v16 ; D2820006 04420309 v_mad_f32 v0, v0, v1, v5 ; D2820000 04160300 v_add_f32_e64 v1, 0, v6 clamp ; D2060801 00020C80 v_mul_f32_e32 v3, v3, v1 ; 10060303 v_mul_f32_e32 v4, v4, v1 ; 10080304 v_mul_f32_e32 v5, v17, v1 ; 100A0311 v_mov_b32_e32 v6, 0xb58637bd ; 7E0C02FF B58637BD v_add_f32_e32 v1, v6, v1 ; 06020306 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, v3, 0, vcc ; D2000001 01A90103 v_cndmask_b32_e64 v3, v4, 0, vcc ; D2000003 01A90104 v_cndmask_b32_e64 v4, v5, 0, vcc ; D2000004 01A90105 v_max_f32_e32 v5, 0, v13 ; 200A1A80 v_mov_b32_e32 v7, 0x7fffffff ; 7E0E02FF 7FFFFFFF v_and_b32_e32 v8, v5, v7 ; 36100F05 v_log_f32_e32 v8, v8 ; 7E104F08 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_and_b32_e32 v7, v0, v7 ; 360E0F00 v_log_f32_e32 v7, v7 ; 7E0E4F07 v_mul_legacy_f32_e32 v8, s24, v8 ; 0E101018 v_add_f32_e32 v5, v6, v5 ; 060A0B06 v_add_f32_e32 v0, v6, v0 ; 06000106 v_mul_legacy_f32_e32 v6, s23, v7 ; 0E0C0E17 v_mov_b32_e32 v7, 0x41000000 ; 7E0E02FF 41000000 v_add_f32_e32 v7, s23, v7 ; 060E0E17 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_mul_f32_e32 v6, v6, v7 ; 100C0F06 v_exp_f32_e32 v7, v8 ; 7E0E4B08 v_mul_f32_e32 v6, 0x3d22f983, v6 ; 100C0CFF 3D22F983 v_mul_f32_e32 v8, v6, v18 ; 10102506 v_mul_f32_e32 v9, v6, v21 ; 10122B06 v_mul_f32_e32 v2, v6, v2 ; 10040506 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v8, 0, vcc ; D2000000 01A90108 v_cndmask_b32_e64 v6, v9, 0, vcc ; D2000006 01A90109 v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_cmp_gt_f32_e32 vcc, 0, v5 ; 7C080A80 v_cndmask_b32_e64 v5, v7, 0, vcc ; D2000005 01A90107 v_add_f32_e32 v0, v1, v0 ; 06000101 v_add_f32_e32 v1, v3, v6 ; 06020D03 v_add_f32_e32 v2, v4, v2 ; 06040504 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mul_f32_e32 v1, v1, v5 ; 10020B01 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mul_f32_e32 v0, v0, v14 ; 10001D00 v_mul_f32_e32 v1, v1, v15 ; 10021F01 v_mul_f32_e32 v2, v2, v10 ; 10041502 v_mul_f32_e32 v0, s22, v0 ; 10000016 v_mul_f32_e32 v1, s21, v1 ; 10020215 v_mul_f32_e32 v2, s20, v2 ; 10040414 v_mul_f32_e32 v0, v0, v11 ; 10001700 v_mul_f32_e32 v1, v1, v11 ; 10021701 v_mul_f32_e32 v2, v2, v11 ; 10041702 v_xor_b32_e32 v3, 0x80000000, v12 ; 3A0618FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v0, 0, v0, vcc ; D2000000 01AA0080 v_cndmask_b32_e64 v1, 0, v1, vcc ; D2000001 01AA0280 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cndmask_b32_e64 v1, 0, v2, vcc ; D2000001 01AA0480 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 32 Code Size: 1448 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], COLOR, PERSPECTIVE DCL IN[1], COLOR[1], PERSPECTIVE DCL IN[2], TEXCOORD[0], PERSPECTIVE DCL IN[3], TEXCOORD[1], PERSPECTIVE DCL IN[4], TEXCOORD[4], PERSPECTIVE DCL IN[5], TEXCOORD[5], PERSPECTIVE DCL IN[6], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL CONST[0..12] DCL TEMP[0] DCL TEMP[1], LOCAL DCL TEMP[2..5] IMM[0] FLT32 { 0.0000, 2.0000, -0.0000, 1.0000} IMM[1] FLT32 { 15.0000, 0.9151, 0.0000, 340282346638528859811704183484516925440.0000} 0: DP3 TEMP[0].x, IN[5], IN[5] 1: RSQ TEMP[1], |TEMP[0].xxxx| 2: MIN TEMP[0].y, IMM[1].wwww, TEMP[1] 3: ADD TEMP[0].x, -TEMP[0].xxxx, IMM[0].wwww 4: MUL TEMP[0].yzw, TEMP[0].yyyy, IN[5].xxyz 5: DP3 TEMP[0].y, TEMP[0].yzww, -CONST[11] 6: ADD TEMP[0].y, TEMP[0].yyyy, -CONST[12].xxxx 7: MUL_SAT TEMP[0].y, TEMP[0].yyyy, CONST[12].yyyy 8: MUL TEMP[0].y, TEMP[0].yyyy, TEMP[0].yyyy 9: TEX TEMP[2], IN[2], SAMP[5], 2D 10: ADD TEMP[0].z, TEMP[2].xxxx, CONST[10].xxxx 11: MUL_SAT TEMP[0].z, TEMP[0].zzzz, CONST[10].yyyy 12: ADD TEMP[0].w, TEMP[0].zzzz, IMM[0].zzzz 13: POW TEMP[2].x, |TEMP[0].zzzz|, CONST[10].zzzz 14: CMP TEMP[0].z, TEMP[0].wwww, IMM[0].xxxx, TEMP[2].xxxx 15: MUL TEMP[0].w, TEMP[0].zzzz, TEMP[0].xxxx 16: MAX TEMP[2].x, TEMP[0].xxxx, IMM[0].xxxx 17: MUL TEMP[3], TEMP[0].yyyy, TEMP[0].wwww 18: MOV TEMP[4], TEMP[3] 19: KILL_IF TEMP[4] 20: ADD TEMP[0].x, TEMP[2].xxxx, IMM[0].zzzz 21: POW TEMP[0].w, |TEMP[2].xxxx|, CONST[9].wwww 22: CMP TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx, TEMP[0].wwww 23: DP3 TEMP[1].x, IN[1], IN[1] 24: RSQ TEMP[1].x, TEMP[1].xxxx 25: MIN TEMP[1].x, IMM[1].wwww, TEMP[1].xxxx 26: MUL TEMP[2].xyz, IN[1], TEMP[1].xxxx 27: DP3 TEMP[1].x, IN[0], IN[0] 28: RSQ TEMP[1].x, TEMP[1].xxxx 29: MIN TEMP[1].x, IMM[1].wwww, TEMP[1].xxxx 30: MUL TEMP[3].xyz, IN[0], TEMP[1].xxxx 31: MUL TEMP[4].xyz, TEMP[2].zxyw, TEMP[3].yzxw 32: MAD TEMP[4].xyz, TEMP[2].yzxw, TEMP[3].zxyw, -TEMP[4] 33: MUL TEMP[4].xyz, TEMP[4], IN[1].wwww 34: DP3 TEMP[1].x, IN[6], IN[6] 35: RSQ TEMP[1].x, TEMP[1].xxxx 36: MIN TEMP[1].x, IMM[1].wwww, TEMP[1].xxxx 37: MUL TEMP[5].xyz, IN[6], TEMP[1].xxxx 38: MAD TEMP[5].xyz, TEMP[5].zzzz, IMM[0].xxyw, -TEMP[5] 39: DP3 TEMP[4].y, TEMP[4], TEMP[5] 40: DP3 TEMP[4].x, TEMP[3], TEMP[5] 41: DP3 TEMP[4].z, TEMP[2], TEMP[5] 42: TEX TEMP[2], TEMP[4], SAMP[4], CUBE 43: MUL TEMP[3].xyz, CONST[6], CONST[6].wwww 44: MUL TEMP[2].xyz, TEMP[2], TEMP[3] 45: MUL TEMP[3].xyz, CONST[7], CONST[7].wwww 46: TEX TEMP[4], IN[3], SAMP[1], 2D 47: MUL TEMP[3].xyz, TEMP[3], TEMP[4] 48: TEX TEMP[4], IN[3], SAMP[0], 2D 49: MAD TEMP[2].xyz, TEMP[4].xxxx, TEMP[2], TEMP[3] 50: MOV TEMP[0].w, IMM[0].wwww 51: ADD TEMP[3].xyz, TEMP[0].wwww, -CONST[0] 52: MUL TEMP[2].xyz, TEMP[2], TEMP[3] 53: MAD TEMP[2].xyz, TEMP[2], CONST[4].wwww, CONST[4] 54: DP3 TEMP[1].x, IN[4], IN[4] 55: RSQ TEMP[1].x, TEMP[1].xxxx 56: MIN TEMP[1].x, IMM[1].wwww, TEMP[1].xxxx 57: MUL TEMP[3].xyz, IN[4], TEMP[1].xxxx 58: MOV_SAT TEMP[0].w, TEMP[3].zzzz 59: DP3_SAT TEMP[2].w, TEMP[5], TEMP[3] 60: MUL TEMP[2].xyz, TEMP[0].wwww, TEMP[2] 61: ADD TEMP[0].w, TEMP[0].wwww, IMM[0].zzzz 62: CMP TEMP[2].xyz, TEMP[0].wwww, IMM[0].xxxx, TEMP[2] 63: POW TEMP[0].w, |TEMP[2].wwww|, IMM[1].xxxx 64: ADD TEMP[2].w, TEMP[2].wwww, IMM[0].zzzz 65: MUL TEMP[0].w, TEMP[0].wwww, IMM[1].yyyy 66: TEX TEMP[4], IN[3], SAMP[2], 2D 67: MAD TEMP[3].xyz, TEMP[4], CONST[5].wwww, CONST[5] 68: MUL TEMP[3].xyz, TEMP[0].wwww, TEMP[3] 69: CMP TEMP[3].xyz, TEMP[2].wwww, IMM[0].xxxx, TEMP[3] 70: ADD TEMP[2].xyz, TEMP[2], TEMP[3] 71: MUL TEMP[2].xyz, TEMP[0].xxxx, TEMP[2] 72: MUL TEMP[0].xzw, TEMP[0].zzzz, TEMP[2].xyyz 73: MUL TEMP[0].xzw, TEMP[0], CONST[9].xyyz 74: MUL TEMP[0].xyz, TEMP[0].yyyy, TEMP[0].xzww 75: TEX TEMP[2], IN[3], SAMP[3], 2D 76: MAX TEMP[3].x, TEMP[2].xxxx, CONST[8].yyyy 77: MIN_SAT TEMP[0].w, CONST[8].xxxx, TEMP[3].xxxx 78: CMP OUT[0], -TEMP[3].wwww, TEMP[0], IMM[0].xxxx 79: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %57 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %58 = load <32 x i8>, <32 x i8> addrspace(2)* %57, align 32, !tbaa !0 %59 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %62 = bitcast <8 x i32> addrspace(2)* %61 to <32 x i8> addrspace(2)* %63 = load <32 x i8>, <32 x i8> addrspace(2)* %62, align 32, !tbaa !0 %64 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %65 = bitcast <4 x i32> addrspace(2)* %64 to <16 x i8> addrspace(2)* %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %68 = bitcast <8 x i32> addrspace(2)* %67 to <32 x i8> addrspace(2)* %69 = load <32 x i8>, <32 x i8> addrspace(2)* %68, align 32, !tbaa !0 %70 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %71 = bitcast <4 x i32> addrspace(2)* %70 to <16 x i8> addrspace(2)* %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %74 = bitcast <8 x i32> addrspace(2)* %73 to <32 x i8> addrspace(2)* %75 = load <32 x i8>, <32 x i8> addrspace(2)* %74, align 32, !tbaa !0 %76 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %77 = bitcast <4 x i32> addrspace(2)* %76 to <16 x i8> addrspace(2)* %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 %79 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %80 = bitcast <8 x i32> addrspace(2)* %79 to <32 x i8> addrspace(2)* %81 = load <32 x i8>, <32 x i8> addrspace(2)* %80, align 32, !tbaa !0 %82 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %83 = bitcast <4 x i32> addrspace(2)* %82 to <16 x i8> addrspace(2)* %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 %85 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %86 = bitcast <8 x i32> addrspace(2)* %85 to <32 x i8> addrspace(2)* %87 = load <32 x i8>, <32 x i8> addrspace(2)* %86, align 32, !tbaa !0 %88 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %89 = bitcast <4 x i32> addrspace(2)* %88 to <16 x i8> addrspace(2)* %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 %91 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %111 = fmul float %105, %105 %112 = fmul float %106, %106 %113 = fadd float %112, %111 %114 = fmul float %107, %107 %115 = fadd float %113, %114 %116 = call float @fabs(float %115) %117 = call float @llvm.AMDGPU.rsq.clamped.f32(float %116) %118 = call float @llvm.minnum.f32(float %117, float 0x47EFFFFFE0000000) %119 = fsub float 1.000000e+00, %115 %120 = fmul float %118, %105 %121 = fmul float %118, %106 %122 = fmul float %118, %107 %123 = fmul float %52, %120 %124 = fsub float -0.000000e+00, %123 %125 = fmul float %53, %121 %126 = fsub float %124, %125 %127 = fmul float %54, %122 %128 = fsub float %126, %127 %129 = fsub float %128, %55 %130 = fmul float %129, %56 %131 = call float @llvm.AMDIL.clamp.(float %130, float 0.000000e+00, float 1.000000e+00) %132 = fmul float %131, %131 %133 = bitcast float %98 to i32 %134 = bitcast float %99 to i32 %135 = insertelement <2 x i32> undef, i32 %133, i32 0 %136 = insertelement <2 x i32> %135, i32 %134, i32 1 %137 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %136, <32 x i8> %87, <16 x i8> %90, i32 2) %138 = extractelement <4 x float> %137, i32 0 %139 = fadd float %138, %49 %140 = fmul float %139, %50 %141 = call float @llvm.AMDIL.clamp.(float %140, float 0.000000e+00, float 1.000000e+00) %142 = fadd float %141, 0xBEB0C6F7A0000000 %143 = call float @fabs(float %141) %144 = call float @llvm.pow.f32(float %143, float %51) %145 = call float @llvm.AMDGPU.cndlt(float %142, float 0.000000e+00, float %144) %146 = fmul float %145, %119 %147 = call float @llvm.maxnum.f32(float %119, float 0.000000e+00) %148 = fmul float %132, %146 %149 = fmul float %132, %146 %150 = fmul float %132, %146 %151 = fmul float %132, %146 %152 = fcmp olt float %148, 0.000000e+00 %153 = fcmp olt float %149, 0.000000e+00 %154 = fcmp olt float %150, 0.000000e+00 %155 = fcmp olt float %151, 0.000000e+00 %156 = or i1 %155, %154 %157 = or i1 %156, %153 %158 = or i1 %157, %152 %159 = select i1 %158, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %159) %160 = fadd float %147, 0xBEB0C6F7A0000000 %161 = call float @fabs(float %147) %162 = call float @llvm.pow.f32(float %161, float %48) %163 = call float @llvm.AMDGPU.cndlt(float %160, float 0.000000e+00, float %162) %164 = fmul float %94, %94 %165 = fmul float %95, %95 %166 = fadd float %165, %164 %167 = fmul float %96, %96 %168 = fadd float %166, %167 %169 = call float @llvm.AMDGPU.rsq.clamped.f32(float %168) %170 = call float @llvm.minnum.f32(float %169, float 0x47EFFFFFE0000000) %171 = fmul float %94, %170 %172 = fmul float %95, %170 %173 = fmul float %96, %170 %174 = fmul float %91, %91 %175 = fmul float %92, %92 %176 = fadd float %175, %174 %177 = fmul float %93, %93 %178 = fadd float %176, %177 %179 = call float @llvm.AMDGPU.rsq.clamped.f32(float %178) %180 = call float @llvm.minnum.f32(float %179, float 0x47EFFFFFE0000000) %181 = fmul float %91, %180 %182 = fmul float %92, %180 %183 = fmul float %93, %180 %184 = fmul float %173, %182 %185 = fmul float %171, %183 %186 = fmul float %172, %181 %187 = fmul float %172, %183 %188 = fsub float %187, %184 %189 = fmul float %173, %181 %190 = fsub float %189, %185 %191 = fmul float %171, %182 %192 = fsub float %191, %186 %193 = fmul float %188, %97 %194 = fmul float %190, %97 %195 = fmul float %192, %97 %196 = fmul float %108, %108 %197 = fmul float %109, %109 %198 = fadd float %197, %196 %199 = fmul float %110, %110 %200 = fadd float %198, %199 %201 = call float @llvm.AMDGPU.rsq.clamped.f32(float %200) %202 = call float @llvm.minnum.f32(float %201, float 0x47EFFFFFE0000000) %203 = fmul float %108, %202 %204 = fmul float %109, %202 %205 = fmul float %110, %202 %206 = fmul float %205, 0.000000e+00 %207 = fsub float %206, %203 %208 = fmul float %205, 0.000000e+00 %209 = fsub float %208, %204 %210 = fmul float %205, 2.000000e+00 %211 = fsub float %210, %205 %212 = fmul float %193, %207 %213 = fmul float %194, %209 %214 = fadd float %213, %212 %215 = fmul float %195, %211 %216 = fadd float %214, %215 %217 = fmul float %181, %207 %218 = fmul float %182, %209 %219 = fadd float %218, %217 %220 = fmul float %183, %211 %221 = fadd float %219, %220 %222 = fmul float %171, %207 %223 = fmul float %172, %209 %224 = fadd float %223, %222 %225 = fmul float %173, %211 %226 = fadd float %224, %225 %227 = insertelement <4 x float> undef, float %221, i32 0 %228 = insertelement <4 x float> %227, float %216, i32 1 %229 = insertelement <4 x float> %228, float %226, i32 2 %230 = insertelement <4 x float> %229, float %151, i32 3 %231 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %230) %232 = extractelement <4 x float> %231, i32 0 %233 = extractelement <4 x float> %231, i32 1 %234 = extractelement <4 x float> %231, i32 2 %235 = extractelement <4 x float> %231, i32 3 %236 = call float @fabs(float %234) %237 = fdiv float 1.000000e+00, %236 %238 = fmul float %232, %237 %239 = fadd float %238, 1.500000e+00 %240 = fmul float %233, %237 %241 = fadd float %240, 1.500000e+00 %242 = bitcast float %241 to i32 %243 = bitcast float %239 to i32 %244 = bitcast float %235 to i32 %245 = insertelement <4 x i32> undef, i32 %242, i32 0 %246 = insertelement <4 x i32> %245, i32 %243, i32 1 %247 = insertelement <4 x i32> %246, i32 %244, i32 2 %248 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %247, <32 x i8> %81, <16 x i8> %84, i32 4) %249 = extractelement <4 x float> %248, i32 0 %250 = extractelement <4 x float> %248, i32 1 %251 = extractelement <4 x float> %248, i32 2 %252 = fmul float %35, %38 %253 = fmul float %36, %38 %254 = fmul float %37, %38 %255 = fmul float %249, %252 %256 = fmul float %250, %253 %257 = fmul float %251, %254 %258 = fmul float %39, %42 %259 = fmul float %40, %42 %260 = fmul float %41, %42 %261 = bitcast float %100 to i32 %262 = bitcast float %101 to i32 %263 = insertelement <2 x i32> undef, i32 %261, i32 0 %264 = insertelement <2 x i32> %263, i32 %262, i32 1 %265 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %264, <32 x i8> %63, <16 x i8> %66, i32 2) %266 = extractelement <4 x float> %265, i32 0 %267 = extractelement <4 x float> %265, i32 1 %268 = extractelement <4 x float> %265, i32 2 %269 = fmul float %258, %266 %270 = fmul float %259, %267 %271 = fmul float %260, %268 %272 = bitcast float %100 to i32 %273 = bitcast float %101 to i32 %274 = insertelement <2 x i32> undef, i32 %272, i32 0 %275 = insertelement <2 x i32> %274, i32 %273, i32 1 %276 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %275, <32 x i8> %58, <16 x i8> %60, i32 2) %277 = extractelement <4 x float> %276, i32 0 %278 = fmul float %277, %255 %279 = fadd float %278, %269 %280 = fmul float %277, %256 %281 = fadd float %280, %270 %282 = fmul float %277, %257 %283 = fadd float %282, %271 %284 = fsub float 1.000000e+00, %24 %285 = fsub float 1.000000e+00, %25 %286 = fsub float 1.000000e+00, %26 %287 = fmul float %279, %284 %288 = fmul float %281, %285 %289 = fmul float %283, %286 %290 = fmul float %287, %30 %291 = fadd float %290, %27 %292 = fmul float %288, %30 %293 = fadd float %292, %28 %294 = fmul float %289, %30 %295 = fadd float %294, %29 %296 = fmul float %102, %102 %297 = fmul float %103, %103 %298 = fadd float %297, %296 %299 = fmul float %104, %104 %300 = fadd float %298, %299 %301 = call float @llvm.AMDGPU.rsq.clamped.f32(float %300) %302 = call float @llvm.minnum.f32(float %301, float 0x47EFFFFFE0000000) %303 = fmul float %102, %302 %304 = fmul float %103, %302 %305 = fmul float %104, %302 %306 = call float @llvm.AMDIL.clamp.(float %305, float 0.000000e+00, float 1.000000e+00) %307 = fmul float %207, %303 %308 = fmul float %209, %304 %309 = fadd float %308, %307 %310 = fmul float %211, %305 %311 = fadd float %309, %310 %312 = call float @llvm.AMDIL.clamp.(float %311, float 0.000000e+00, float 1.000000e+00) %313 = fmul float %306, %291 %314 = fmul float %306, %293 %315 = fmul float %306, %295 %316 = fadd float %306, 0xBEB0C6F7A0000000 %317 = call float @llvm.AMDGPU.cndlt(float %316, float 0.000000e+00, float %313) %318 = call float @llvm.AMDGPU.cndlt(float %316, float 0.000000e+00, float %314) %319 = call float @llvm.AMDGPU.cndlt(float %316, float 0.000000e+00, float %315) %320 = call float @fabs(float %312) %321 = call float @llvm.pow.f32(float %320, float 1.500000e+01) %322 = fadd float %312, 0xBEB0C6F7A0000000 %323 = fmul float %321, 0x3FED48D5A0000000 %324 = bitcast float %100 to i32 %325 = bitcast float %101 to i32 %326 = insertelement <2 x i32> undef, i32 %324, i32 0 %327 = insertelement <2 x i32> %326, i32 %325, i32 1 %328 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %327, <32 x i8> %69, <16 x i8> %72, i32 2) %329 = extractelement <4 x float> %328, i32 0 %330 = extractelement <4 x float> %328, i32 1 %331 = extractelement <4 x float> %328, i32 2 %332 = fmul float %329, %34 %333 = fadd float %332, %31 %334 = fmul float %330, %34 %335 = fadd float %334, %32 %336 = fmul float %331, %34 %337 = fadd float %336, %33 %338 = fmul float %323, %333 %339 = fmul float %323, %335 %340 = fmul float %323, %337 %341 = call float @llvm.AMDGPU.cndlt(float %322, float 0.000000e+00, float %338) %342 = call float @llvm.AMDGPU.cndlt(float %322, float 0.000000e+00, float %339) %343 = call float @llvm.AMDGPU.cndlt(float %322, float 0.000000e+00, float %340) %344 = fadd float %317, %341 %345 = fadd float %318, %342 %346 = fadd float %319, %343 %347 = fmul float %163, %344 %348 = fmul float %163, %345 %349 = fmul float %163, %346 %350 = fmul float %145, %347 %351 = fmul float %145, %348 %352 = fmul float %145, %349 %353 = fmul float %350, %45 %354 = fmul float %351, %46 %355 = fmul float %352, %47 %356 = fmul float %132, %353 %357 = fmul float %132, %354 %358 = fmul float %132, %355 %359 = bitcast float %100 to i32 %360 = bitcast float %101 to i32 %361 = insertelement <2 x i32> undef, i32 %359, i32 0 %362 = insertelement <2 x i32> %361, i32 %360, i32 1 %363 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %362, <32 x i8> %75, <16 x i8> %78, i32 2) %364 = extractelement <4 x float> %363, i32 0 %365 = call float @llvm.maxnum.f32(float %364, float %44) %366 = call float @llvm.minnum.f32(float %43, float %365) %367 = call float @llvm.AMDIL.clamp.(float %366, float 0.000000e+00, float 1.000000e+00) %368 = fsub float -0.000000e+00, %151 %369 = call float @llvm.AMDGPU.cndlt(float %368, float %356, float 0.000000e+00) %370 = fsub float -0.000000e+00, %151 %371 = call float @llvm.AMDGPU.cndlt(float %370, float %357, float 0.000000e+00) %372 = fsub float -0.000000e+00, %151 %373 = call float @llvm.AMDGPU.cndlt(float %372, float %358, float 0.000000e+00) %374 = fsub float -0.000000e+00, %151 %375 = call float @llvm.AMDGPU.cndlt(float %374, float %367, float 0.000000e+00) %376 = call i32 @llvm.SI.packf16(float %369, float %371) %377 = bitcast i32 %376 to float %378 = call i32 @llvm.SI.packf16(float %373, float %375) %379 = bitcast i32 %378 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %377, float %379, float %377, float %379) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700 v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701 v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800 v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801 v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900 v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 v_interp_p1_f32 v13, v0, 0, 4, [m0] ; C8341000 v_interp_p2_f32 v13, [v13], v1, 0, 4, [m0] ; C8351001 v_interp_p1_f32 v14, v0, 1, 4, [m0] ; C8381100 v_interp_p2_f32 v14, [v14], v1, 1, 4, [m0] ; C8391101 v_interp_p1_f32 v15, v0, 2, 4, [m0] ; C83C1200 v_interp_p2_f32 v15, [v15], v1, 2, 4, [m0] ; C83D1201 v_interp_p1_f32 v16, v0, 0, 5, [m0] ; C8401400 v_interp_p2_f32 v16, [v16], v1, 0, 5, [m0] ; C8411401 v_interp_p1_f32 v17, v0, 1, 5, [m0] ; C8441500 v_interp_p2_f32 v17, [v17], v1, 1, 5, [m0] ; C8451501 v_interp_p1_f32 v18, v0, 2, 5, [m0] ; C8481600 v_interp_p2_f32 v18, [v18], v1, 2, 5, [m0] ; C8491601 v_interp_p1_f32 v19, v0, 0, 6, [m0] ; C84C1800 v_interp_p2_f32 v19, [v19], v1, 0, 6, [m0] ; C84D1801 v_interp_p1_f32 v20, v0, 1, 6, [m0] ; C8501900 v_interp_p2_f32 v20, [v20], v1, 1, 6, [m0] ; C8511901 v_interp_p1_f32 v0, v0, 2, 6, [m0] ; C8001A00 s_load_dwordx4 s[12:15], s[4:5], 0x14 ; C0860514 s_load_dwordx8 s[16:23], s[6:7], 0x28 ; C0C80728 v_interp_p2_f32 v0, [v0], v1, 2, 6, [m0] ; C8011A01 v_mul_f32_e32 v1, v16, v16 ; 10022110 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s24, s[0:3], 0x2c ; C20C012C v_mad_f32 v1, v17, v17, v1 ; D2820001 04062311 s_buffer_load_dword s25, s[0:3], 0x2d ; C20C812D v_mad_f32 v1, v18, v18, v1 ; D2820001 04062512 v_rsq_clamp_f32_e64 v21, |v1| ; D3580115 00000101 s_buffer_load_dword s26, s[0:3], 0x28 ; C20D0128 s_buffer_load_dword s27, s[0:3], 0x29 ; C20D8129 image_sample v9, 1, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[16:23], s[12:15] ; F0800100 00640909 v_min_f32_e32 v10, 0x7f7fffff, v21 ; 1E142AFF 7F7FFFFF v_mul_f32_e32 v16, v16, v10 ; 10201510 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v16, s24, v16 ; 10202018 v_mul_f32_e32 v17, v17, v10 ; 10221511 v_mad_f32 v16, -s25, v17, -v16 ; D2820010 A4422219 s_buffer_load_dword s12, s[0:3], 0x2a ; C206012A s_buffer_load_dword s13, s[0:3], 0x2e ; C206812E s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v9, s26, v9 ; 0612121A v_mul_f32_e32 v9, s27, v9 ; 1012121B v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_mov_b32_e32 v17, 0x7fffffff ; 7E2202FF 7FFFFFFF v_and_b32_e32 v21, v9, v17 ; 362A2309 v_log_f32_e32 v21, v21 ; 7E2A4F15 s_buffer_load_dword s14, s[0:3], 0x30 ; C2070130 s_buffer_load_dword s15, s[0:3], 0x31 ; C2078131 v_mul_f32_e32 v10, v18, v10 ; 10141512 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, -s13, v10, v16 ; D282000A 2442140D v_mul_legacy_f32_e32 v16, s12, v21 ; 0E202A0C v_mov_b32_e32 v18, 0xb58637bd ; 7E2402FF B58637BD v_add_f32_e32 v9, v18, v9 ; 06121312 v_exp_f32_e32 v16, v16 ; 7E204B10 v_cmp_gt_f32_e32 vcc, 0, v9 ; 7C081280 v_cndmask_b32_e64 v9, v16, 0, vcc ; D2000009 01A90110 v_subrev_f32_e32 v10, s14, v10 ; 0A14140E v_mul_f32_e32 v10, s15, v10 ; 1014140F v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_mul_f32_e32 v10, v10, v10 ; 1014150A v_mul_f32_e32 v16, v1, v9 ; 10201301 v_mul_f32_e32 v24, v16, v10 ; 10301510 v_cmp_gt_f32_e32 vcc, 0, v24 ; 7C083080 v_cndmask_b32_e64 v16, 0, -1.0, vcc ; D2000010 01A9E680 v_cndmask_b32_e64 v16, v16, -1.0, vcc ; D2000010 01A9E710 v_cndmask_b32_e64 v16, v16, -1.0, vcc ; D2000010 01A9E710 v_cndmask_b32_e64 v16, v16, -1.0, vcc ; D2000010 01A9E710 v_max_f32_e32 v1, 0, v1 ; 20020280 v_cmpx_le_f32_e32 vcc, 0, v16 ; 7C262080 v_mul_f32_e32 v16, v5, v5 ; 10200B05 v_mad_f32 v16, v6, v6, v16 ; D2820010 04420D06 v_mad_f32 v16, v7, v7, v16 ; D2820010 04420F07 v_rsq_clamp_f32_e32 v16, v16 ; 7E205910 v_min_f32_e32 v16, 0x7f7fffff, v16 ; 1E2020FF 7F7FFFFF v_mul_f32_e32 v21, v2, v2 ; 102A0502 v_mad_f32 v21, v3, v3, v21 ; D2820015 04560703 v_mad_f32 v21, v4, v4, v21 ; D2820015 04560904 v_rsq_clamp_f32_e32 v21, v21 ; 7E2A5915 v_mul_f32_e32 v5, v16, v5 ; 100A0B10 v_mul_f32_e32 v6, v16, v6 ; 100C0D10 v_mul_f32_e32 v7, v16, v7 ; 100E0F10 v_min_f32_e32 v16, 0x7f7fffff, v21 ; 1E202AFF 7F7FFFFF v_mul_f32_e32 v2, v16, v2 ; 10040510 v_mul_f32_e32 v3, v16, v3 ; 10060710 v_mul_f32_e32 v4, v16, v4 ; 10080910 v_mul_f32_e32 v16, v3, v7 ; 10200F03 v_mad_f32 v16, v6, v4, -v16 ; D2820010 84420906 v_mul_f32_e32 v21, v4, v5 ; 102A0B04 v_mad_f32 v21, v7, v2, -v21 ; D2820015 84560507 v_mul_f32_e32 v22, v2, v6 ; 102C0D02 v_mad_f32 v22, v5, v3, -v22 ; D2820016 845A0705 v_mul_f32_e32 v23, v19, v19 ; 102E2713 v_mad_f32 v23, v20, v20, v23 ; D2820017 045E2914 v_mad_f32 v23, v0, v0, v23 ; D2820017 045E0100 v_rsq_clamp_f32_e32 v23, v23 ; 7E2E5917 v_mul_f32_e32 v16, v8, v16 ; 10202108 v_mul_f32_e32 v21, v8, v21 ; 102A2B08 v_mul_f32_e32 v8, v8, v22 ; 10102D08 v_min_f32_e32 v22, 0x7f7fffff, v23 ; 1E2C2EFF 7F7FFFFF v_mul_f32_e32 v23, v22, v0 ; 102E0116 v_mad_f32 v25, v0, v22, v23 ; D2820019 045E2D00 v_mad_f32 v0, -v0, v22, v25 ; D2820000 24662D00 v_mul_f32_e32 v19, v22, v19 ; 10262716 v_mul_f32_e32 v20, v22, v20 ; 10282916 v_mad_f32 v19, 0, v23, -v19 ; D2820013 844E2E80 v_mad_f32 v20, 0, v23, -v20 ; D2820014 84522E80 v_mul_f32_e32 v16, v19, v16 ; 10202113 v_mad_f32 v16, v21, v20, v16 ; D2820010 04422915 v_mad_f32 v22, v8, v0, v16 ; D2820016 04420108 v_mul_f32_e32 v2, v19, v2 ; 10040513 v_mad_f32 v2, v3, v20, v2 ; D2820002 040A2903 v_mad_f32 v21, v4, v0, v2 ; D2820015 040A0104 v_mul_f32_e32 v2, v19, v5 ; 10040B13 v_mad_f32 v2, v6, v20, v2 ; D2820002 040A2906 v_mad_f32 v23, v7, v0, v2 ; D2820017 040A0107 s_load_dwordx4 s[12:15], s[4:5], 0x10 ; C0860510 s_load_dwordx8 s[16:23], s[6:7], 0x20 ; C0C80720 v_cubeid_f32 v28, v21, v22, v23 ; D288001C 045E2D15 v_cubema_f32 v27, v21, v22, v23 ; D28E001B 045E2D15 v_cubesc_f32 v26, v21, v22, v23 ; D28A001A 045E2D15 v_cubetc_f32 v25, v21, v22, v23 ; D28C0019 045E2D15 v_rcp_f32_e64 v2, |v27| ; D3540102 0000011B v_mov_b32_e32 v3, 0x3fc00000 ; 7E0602FF 3FC00000 v_mad_f32 v27, v25, v2, v3 ; D282001B 040E0519 v_mad_f32 v26, v26, v2, v3 ; D282001A 040E051A s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[16:23], s[12:15] ; F0800700 0064021A s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 s_load_dwordx8 s[48:55], s[6:7], 0x0 ; C0D80700 s_buffer_load_dword s56, s[0:3], 0x14 ; C21C0114 s_buffer_load_dword s57, s[0:3], 0x15 ; C21C8115 s_buffer_load_dword s58, s[0:3], 0x16 ; C21D0116 s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710 s_load_dwordx4 s[32:35], s[4:5], 0x8 ; C0900508 s_buffer_load_dword s59, s[0:3], 0x17 ; C21D8117 s_load_dwordx4 s[12:15], s[4:5], 0xc ; C086050C s_load_dwordx8 s[16:23], s[6:7], 0x18 ; C0C80718 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 image_sample v[5:7], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[40:47], s[36:39] ; F0800700 012A050B image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[48:55], s[8:11] ; F0800100 004C080B s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_buffer_load_dword s5, s[0:3], 0x18 ; C2028118 s_buffer_load_dword s6, s[0:3], 0x19 ; C2030119 s_buffer_load_dword s7, s[0:3], 0x1a ; C203811A v_mov_b32_e32 v16, s56 ; 7E200238 image_sample v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[24:31], s[32:35] ; F0800700 0106150B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v16, s59, v21, v16 ; D2820010 04422A3B v_mov_b32_e32 v21, s57 ; 7E2A0239 v_mad_f32 v21, s59, v22, v21 ; D2820015 04562C3B v_mov_b32_e32 v22, s58 ; 7E2C023A v_mad_f32 v22, s59, v23, v22 ; D2820016 045A2E3B s_buffer_load_dword s8, s[0:3], 0x1f ; C204011F s_buffer_load_dword s9, s[0:3], 0x1c ; C204811C s_buffer_load_dword s10, s[0:3], 0x1d ; C205011D s_buffer_load_dword s11, s[0:3], 0x1e ; C205811E v_mov_b32_e32 v23, s4 ; 7E2E0204 v_mul_f32_e32 v23, s5, v23 ; 102E2E05 v_mov_b32_e32 v25, s4 ; 7E320204 v_mul_f32_e32 v25, s6, v25 ; 10323206 v_mov_b32_e32 v26, s4 ; 7E340204 v_mul_f32_e32 v26, s7, v26 ; 10343407 v_mul_f32_e32 v2, v23, v2 ; 10040517 v_mul_f32_e32 v3, v25, v3 ; 10060719 v_mul_f32_e32 v4, v26, v4 ; 1008091A s_buffer_load_dword s4, s[0:3], 0x20 ; C2020120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v23, s8 ; 7E2E0208 v_mul_f32_e32 v23, s9, v23 ; 102E2E09 v_mov_b32_e32 v25, s8 ; 7E320208 v_mul_f32_e32 v25, s10, v25 ; 1032320A v_mov_b32_e32 v26, s8 ; 7E340208 v_mul_f32_e32 v26, s11, v26 ; 1034340B v_mul_f32_e32 v5, v5, v23 ; 100A2F05 v_mul_f32_e32 v6, v6, v25 ; 100C3306 v_mul_f32_e32 v7, v7, v26 ; 100E3507 v_xor_b32_e32 v23, 0x80000000, v24 ; 3A2E30FF 80000000 image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[16:23], s[12:15] ; F0800100 00640B0B v_mad_f32 v2, v8, v2, v5 ; D2820002 04160508 v_mad_f32 v3, v8, v3, v6 ; D2820003 041A0708 v_mul_f32_e32 v5, v13, v13 ; 100A1B0D v_mad_f32 v5, v14, v14, v5 ; D2820005 04161D0E v_mad_f32 v5, v15, v15, v5 ; D2820005 04161F0F v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 s_buffer_load_dword s5, s[0:3], 0x12 ; C2028112 s_buffer_load_dword s6, s[0:3], 0x13 ; C2030113 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s9, s[0:3], 0x2 ; C2048102 s_buffer_load_dword s10, s[0:3], 0x10 ; C2050110 s_buffer_load_dword s11, s[0:3], 0x11 ; C2058111 s_buffer_load_dword s12, s[0:3], 0x21 ; C2060121 s_buffer_load_dword s13, s[0:3], 0x24 ; C2068124 s_buffer_load_dword s14, s[0:3], 0x25 ; C2070125 s_buffer_load_dword s15, s[0:3], 0x26 ; C2078126 s_buffer_load_dword s0, s[0:3], 0x27 ; C2000127 v_mad_f32 v4, v8, v4, v7 ; D2820004 041E0908 v_min_f32_e32 v5, 0x7f7fffff, v5 ; 1E0A0AFF 7F7FFFFF v_mul_f32_e32 v6, v5, v13 ; 100C1B05 v_mul_f32_e32 v6, v6, v19 ; 100C2706 v_mul_f32_e32 v7, v5, v14 ; 100E1D05 v_mad_f32 v6, v20, v7, v6 ; D2820006 041A0F14 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_sub_f32_e64 v7, 1.0, s7 ; D2080007 00000EF2 v_mul_f32_e32 v2, v7, v2 ; 10040507 v_sub_f32_e64 v7, 1.0, s8 ; D2080007 000010F2 v_mul_f32_e32 v3, v7, v3 ; 10060707 v_sub_f32_e64 v7, 1.0, s9 ; D2080007 000012F2 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mov_b32_e32 v7, s10 ; 7E0E020A v_mad_f32 v2, s6, v2, v7 ; D2820002 041E0406 v_mov_b32_e32 v7, s11 ; 7E0E020B v_mad_f32 v3, s6, v3, v7 ; D2820003 041E0606 v_mov_b32_e32 v7, s5 ; 7E0E0205 v_mad_f32 v4, s6, v4, v7 ; D2820004 041E0806 v_mul_f32_e32 v5, v5, v15 ; 100A1F05 v_mad_f32 v0, v0, v5, v6 ; D2820000 041A0B00 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mul_f32_e32 v3, v3, v5 ; 10060B03 v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_add_f32_e32 v5, v18, v5 ; 060A0B12 v_and_b32_e32 v6, v0, v17 ; 360C2300 v_log_f32_e32 v6, v6 ; 7E0C4F06 v_cmp_gt_f32_e32 vcc, 0, v5 ; 7C080A80 v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_cndmask_b32_e64 v4, v4, 0, vcc ; D2000004 01A90104 v_mul_legacy_f32_e32 v5, 0x41700000, v6 ; 0E0A0CFF 41700000 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_mul_f32_e32 v5, 0x3f6a46ad, v5 ; 100A0AFF 3F6A46AD v_mul_f32_e32 v6, v16, v5 ; 100C0B10 v_mul_f32_e32 v7, v21, v5 ; 100E0B15 v_mul_f32_e32 v5, v22, v5 ; 100A0B16 v_add_f32_e32 v0, v18, v0 ; 06000112 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v6, 0, vcc ; D2000000 01A90106 v_cndmask_b32_e64 v6, v7, 0, vcc ; D2000006 01A90107 v_cndmask_b32_e64 v5, v5, 0, vcc ; D2000005 01A90105 v_add_f32_e32 v7, v18, v1 ; 060E0312 v_and_b32_e32 v1, v1, v17 ; 36022301 v_add_f32_e32 v0, v0, v2 ; 06000500 v_log_f32_e32 v1, v1 ; 7E024F01 v_cmp_gt_f32_e32 vcc, 0, v7 ; 7C080E80 v_add_f32_e32 v2, v6, v3 ; 06040706 v_add_f32_e32 v3, v5, v4 ; 06060905 v_mul_legacy_f32_e32 v1, s0, v1 ; 0E020200 v_exp_f32_e32 v1, v1 ; 7E024B01 v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_mul_f32_e32 v2, v2, v1 ; 10040302 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_mul_f32_e32 v0, v0, v9 ; 10001300 v_mul_f32_e32 v2, v2, v9 ; 10041302 v_mul_f32_e32 v1, v1, v9 ; 10021301 v_mul_f32_e32 v0, s13, v0 ; 1000000D v_mul_f32_e32 v2, s14, v2 ; 1004040E v_mul_f32_e32 v1, s15, v1 ; 1002020F v_mul_f32_e32 v0, v0, v10 ; 10001500 v_mul_f32_e32 v2, v2, v10 ; 10041502 v_mul_f32_e32 v1, v1, v10 ; 10021501 v_cmp_gt_f32_e32 vcc, 0, v23 ; 7C082E80 v_cndmask_b32_e64 v0, 0, v0, vcc ; D2000000 01AA0080 v_cndmask_b32_e64 v2, 0, v2, vcc ; D2000002 01AA0480 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 v_max_f32_e32 v2, s12, v11 ; 2004160C v_min_f32_e32 v2, s4, v2 ; 1E040404 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_cndmask_b32_e64 v1, 0, v1, vcc ; D2000001 01AA0280 v_cndmask_b32_e64 v2, 0, v2, vcc ; D2000002 01AA0480 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 32 Code Size: 1552 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL IN[2], TEXCOORD[5], PERSPECTIVE DCL IN[3], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL CONST[0..9] DCL TEMP[0] DCL TEMP[1], LOCAL DCL TEMP[2..5] IMM[0] FLT32 { 2.0000, -1.0000, -0.3333, 1.0000} IMM[1] FLT32 { 0.8165, 0.5774, 0.0000, 0.0000} IMM[2] FLT32 { 0.0039, 0.0000, 340282346638528859811704183484516925440.0000, -340282346638528859811704183484516925440.0000} IMM[3] FLT32 { -0.7071, -0.4082, 0.5774, 0.7071} 0: TEX TEMP[0], IN[1], SAMP[5], 2D 1: ADD TEMP[0], TEMP[0].xxxx, IMM[0].zzzz 2: KILL_IF TEMP[0] 3: DP3 TEMP[1].x, IN[3], IN[3] 4: RSQ TEMP[1].x, TEMP[1].xxxx 5: MIN TEMP[1].x, IMM[2].zzzz, TEMP[1].xxxx 6: MUL TEMP[0].xyz, IN[3], TEMP[1].xxxx 7: TEX TEMP[2], IN[1], SAMP[2], 2D 8: MAD TEMP[2].xyz, TEMP[2], IMM[0].xxxx, IMM[0].yyyy 9: DP3 TEMP[1].x, TEMP[2], TEMP[2] 10: RSQ TEMP[1].x, TEMP[1].xxxx 11: MIN TEMP[1].x, IMM[2].zzzz, TEMP[1].xxxx 12: MUL TEMP[3].xyz, TEMP[2], TEMP[1].xxxx 13: DP3 TEMP[0].w, TEMP[3], TEMP[0] 14: MUL TEMP[2].xyz, TEMP[0].wwww, TEMP[3] 15: MAD TEMP[0].xyz, TEMP[2], IMM[0].xxxx, -TEMP[0] 16: DP2 TEMP[1].x, TEMP[0].yzzw, IMM[1] 17: ADD_SAT TEMP[2].x, IMM[1].zzzz, TEMP[1].xxxx 18: DP3_SAT TEMP[2].y, TEMP[0], IMM[3] 19: DP3_SAT TEMP[2].z, TEMP[0].yzxw, IMM[3].yzww 20: MAX TEMP[0].xyz, TEMP[2], IMM[1].wwww 21: LG2 TEMP[1].x, |TEMP[0].xxxx| 22: MAX TEMP[2].x, IMM[2].wwww, TEMP[1].xxxx 23: LG2 TEMP[1].x, |TEMP[0].yyyy| 24: MAX TEMP[2].y, IMM[2].wwww, TEMP[1].xxxx 25: LG2 TEMP[1].x, |TEMP[0].zzzz| 26: MAX TEMP[2].z, IMM[2].wwww, TEMP[1].xxxx 27: MOV TEMP[0].w, IMM[0].wwww 28: ADD TEMP[0].x, TEMP[0].wwww, CONST[8].xxxx 29: MUL TEMP[0].xyz, TEMP[2], TEMP[0].xxxx 30: EX2 TEMP[2].x, TEMP[0].xxxx 31: EX2 TEMP[2].y, TEMP[0].yyyy 32: EX2 TEMP[2].z, TEMP[0].zzzz 33: TEX TEMP[4], IN[0], SAMP[1], 2D 34: MUL TEMP[0].xyz, TEMP[4], CONST[7] 35: DP3 TEMP[2].x, TEMP[0], TEMP[2] 36: TEX TEMP[4], IN[1], SAMP[4], 2D 37: MAD TEMP[2].yzw, TEMP[4].xxyz, CONST[5].wwww, CONST[5].xxyz 38: TEX TEMP[4], IN[0], SAMP[0], 2D 39: MUL TEMP[4].xyz, TEMP[4], CONST[6] 40: MUL TEMP[2].yzw, TEMP[2], TEMP[4].xxyz 41: MUL TEMP[2].xyz, TEMP[2].xxxx, TEMP[2].yzww 42: DP2 TEMP[1].x, TEMP[3].yzzw, IMM[1] 43: ADD_SAT TEMP[5].x, IMM[1].zzzz, TEMP[1].xxxx 44: DP3_SAT TEMP[5].y, TEMP[3], IMM[3] 45: DP3_SAT TEMP[5].z, TEMP[3].yzxw, IMM[3].yzww 46: MUL TEMP[3].xyz, TEMP[5], TEMP[5] 47: MAX TEMP[5].xyz, TEMP[3], IMM[1].wwww 48: DP3 TEMP[0].x, TEMP[0], TEMP[5] 49: ADD TEMP[0].yzw, TEMP[0].wwww, -CONST[0].xxyz 50: TEX TEMP[3], IN[1], SAMP[3], 2D 51: MUL TEMP[0].yzw, TEMP[0], TEMP[3].xxyz 52: MAD TEMP[0].yzw, TEMP[0], CONST[4].wwww, CONST[4].xxyz 53: MUL TEMP[3].xyz, TEMP[4], TEMP[0].yzww 54: MAD TEMP[2].xyz, TEMP[3], TEMP[0].xxxx, TEMP[2] 55: ADD TEMP[2].xyz, TEMP[2], CONST[0] 56: MAD OUT[0].xyz, TEMP[0].yzww, CONST[9], TEMP[2] 57: MUL OUT[0].w, IMM[2].xxxx, IN[2].wwww 58: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %45 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %46 = load <32 x i8>, <32 x i8> addrspace(2)* %45, align 32, !tbaa !0 %47 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 %49 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %50 = bitcast <8 x i32> addrspace(2)* %49 to <32 x i8> addrspace(2)* %51 = load <32 x i8>, <32 x i8> addrspace(2)* %50, align 32, !tbaa !0 %52 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %53 = bitcast <4 x i32> addrspace(2)* %52 to <16 x i8> addrspace(2)* %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %56 = bitcast <8 x i32> addrspace(2)* %55 to <32 x i8> addrspace(2)* %57 = load <32 x i8>, <32 x i8> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %59 = bitcast <4 x i32> addrspace(2)* %58 to <16 x i8> addrspace(2)* %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %62 = bitcast <8 x i32> addrspace(2)* %61 to <32 x i8> addrspace(2)* %63 = load <32 x i8>, <32 x i8> addrspace(2)* %62, align 32, !tbaa !0 %64 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %65 = bitcast <4 x i32> addrspace(2)* %64 to <16 x i8> addrspace(2)* %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %68 = bitcast <8 x i32> addrspace(2)* %67 to <32 x i8> addrspace(2)* %69 = load <32 x i8>, <32 x i8> addrspace(2)* %68, align 32, !tbaa !0 %70 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %71 = bitcast <4 x i32> addrspace(2)* %70 to <16 x i8> addrspace(2)* %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %74 = bitcast <8 x i32> addrspace(2)* %73 to <32 x i8> addrspace(2)* %75 = load <32 x i8>, <32 x i8> addrspace(2)* %74, align 32, !tbaa !0 %76 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %77 = bitcast <4 x i32> addrspace(2)* %76 to <16 x i8> addrspace(2)* %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 %79 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %87 = bitcast float %81 to i32 %88 = bitcast float %82 to i32 %89 = insertelement <2 x i32> undef, i32 %87, i32 0 %90 = insertelement <2 x i32> %89, i32 %88, i32 1 %91 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %90, <32 x i8> %75, <16 x i8> %78, i32 2) %92 = extractelement <4 x float> %91, i32 0 %93 = fadd float %92, 0xBFD554C980000000 %94 = fadd float %92, 0xBFD554C980000000 %95 = fadd float %92, 0xBFD554C980000000 %96 = fadd float %92, 0xBFD554C980000000 %97 = fcmp olt float %93, 0.000000e+00 %98 = fcmp olt float %94, 0.000000e+00 %99 = fcmp olt float %95, 0.000000e+00 %100 = fcmp olt float %96, 0.000000e+00 %101 = or i1 %100, %99 %102 = or i1 %101, %98 %103 = or i1 %102, %97 %104 = select i1 %103, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %104) %105 = fmul float %84, %84 %106 = fmul float %85, %85 %107 = fadd float %106, %105 %108 = fmul float %86, %86 %109 = fadd float %107, %108 %110 = call float @llvm.AMDGPU.rsq.clamped.f32(float %109) %111 = call float @llvm.minnum.f32(float %110, float 0x47EFFFFFE0000000) %112 = fmul float %84, %111 %113 = fmul float %85, %111 %114 = fmul float %86, %111 %115 = bitcast float %81 to i32 %116 = bitcast float %82 to i32 %117 = insertelement <2 x i32> undef, i32 %115, i32 0 %118 = insertelement <2 x i32> %117, i32 %116, i32 1 %119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %118, <32 x i8> %57, <16 x i8> %60, i32 2) %120 = extractelement <4 x float> %119, i32 0 %121 = extractelement <4 x float> %119, i32 1 %122 = extractelement <4 x float> %119, i32 2 %123 = fmul float %120, 2.000000e+00 %124 = fadd float %123, -1.000000e+00 %125 = fmul float %121, 2.000000e+00 %126 = fadd float %125, -1.000000e+00 %127 = fmul float %122, 2.000000e+00 %128 = fadd float %127, -1.000000e+00 %129 = fmul float %124, %124 %130 = fmul float %126, %126 %131 = fadd float %130, %129 %132 = fmul float %128, %128 %133 = fadd float %131, %132 %134 = call float @llvm.AMDGPU.rsq.clamped.f32(float %133) %135 = call float @llvm.minnum.f32(float %134, float 0x47EFFFFFE0000000) %136 = fmul float %124, %135 %137 = fmul float %126, %135 %138 = fmul float %128, %135 %139 = fmul float %136, %112 %140 = fmul float %137, %113 %141 = fadd float %140, %139 %142 = fmul float %138, %114 %143 = fadd float %141, %142 %144 = fmul float %143, %136 %145 = fmul float %143, %137 %146 = fmul float %143, %138 %147 = fmul float %144, 2.000000e+00 %148 = fsub float %147, %112 %149 = fmul float %145, 2.000000e+00 %150 = fsub float %149, %113 %151 = fmul float %146, 2.000000e+00 %152 = fsub float %151, %114 %153 = fmul float %150, 0x3FEA20BD80000000 %154 = fmul float %152, 0x3FE279A740000000 %155 = fadd float %153, %154 %156 = fadd float %155, 0.000000e+00 %157 = call float @llvm.AMDIL.clamp.(float %156, float 0.000000e+00, float 1.000000e+00) %158 = fmul float %148, 0xBFE6A09E60000000 %159 = fmul float %150, 0xBFDA20BD80000000 %160 = fadd float %159, %158 %161 = fmul float %152, 0x3FE279A740000000 %162 = fadd float %160, %161 %163 = call float @llvm.AMDIL.clamp.(float %162, float 0.000000e+00, float 1.000000e+00) %164 = fmul float %150, 0xBFDA20BD80000000 %165 = fmul float %152, 0x3FE279A740000000 %166 = fadd float %165, %164 %167 = fmul float %148, 0x3FE6A09E60000000 %168 = fadd float %166, %167 %169 = call float @llvm.AMDIL.clamp.(float %168, float 0.000000e+00, float 1.000000e+00) %170 = call float @llvm.maxnum.f32(float %157, float 0x3EB0C6F7A0000000) %171 = call float @llvm.maxnum.f32(float %163, float 0x3EB0C6F7A0000000) %172 = call float @llvm.maxnum.f32(float %169, float 0x3EB0C6F7A0000000) %173 = call float @fabs(float %170) %174 = call float @llvm.log2.f32(float %173) %175 = call float @llvm.maxnum.f32(float %174, float 0xC7EFFFFFE0000000) %176 = call float @fabs(float %171) %177 = call float @llvm.log2.f32(float %176) %178 = call float @llvm.maxnum.f32(float %177, float 0xC7EFFFFFE0000000) %179 = call float @fabs(float %172) %180 = call float @llvm.log2.f32(float %179) %181 = call float @llvm.maxnum.f32(float %180, float 0xC7EFFFFFE0000000) %182 = fadd float %41, 1.000000e+00 %183 = fmul float %175, %182 %184 = fmul float %178, %182 %185 = fmul float %181, %182 %186 = call float @llvm.AMDIL.exp.(float %183) %187 = call float @llvm.AMDIL.exp.(float %184) %188 = call float @llvm.AMDIL.exp.(float %185) %189 = bitcast float %79 to i32 %190 = bitcast float %80 to i32 %191 = insertelement <2 x i32> undef, i32 %189, i32 0 %192 = insertelement <2 x i32> %191, i32 %190, i32 1 %193 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %192, <32 x i8> %51, <16 x i8> %54, i32 2) %194 = extractelement <4 x float> %193, i32 0 %195 = extractelement <4 x float> %193, i32 1 %196 = extractelement <4 x float> %193, i32 2 %197 = fmul float %194, %38 %198 = fmul float %195, %39 %199 = fmul float %196, %40 %200 = fmul float %197, %186 %201 = fmul float %198, %187 %202 = fadd float %201, %200 %203 = fmul float %199, %188 %204 = fadd float %202, %203 %205 = bitcast float %81 to i32 %206 = bitcast float %82 to i32 %207 = insertelement <2 x i32> undef, i32 %205, i32 0 %208 = insertelement <2 x i32> %207, i32 %206, i32 1 %209 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %208, <32 x i8> %69, <16 x i8> %72, i32 2) %210 = extractelement <4 x float> %209, i32 0 %211 = extractelement <4 x float> %209, i32 1 %212 = extractelement <4 x float> %209, i32 2 %213 = fmul float %210, %34 %214 = fadd float %213, %31 %215 = fmul float %211, %34 %216 = fadd float %215, %32 %217 = fmul float %212, %34 %218 = fadd float %217, %33 %219 = bitcast float %79 to i32 %220 = bitcast float %80 to i32 %221 = insertelement <2 x i32> undef, i32 %219, i32 0 %222 = insertelement <2 x i32> %221, i32 %220, i32 1 %223 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %222, <32 x i8> %46, <16 x i8> %48, i32 2) %224 = extractelement <4 x float> %223, i32 0 %225 = extractelement <4 x float> %223, i32 1 %226 = extractelement <4 x float> %223, i32 2 %227 = fmul float %224, %35 %228 = fmul float %225, %36 %229 = fmul float %226, %37 %230 = fmul float %214, %227 %231 = fmul float %216, %228 %232 = fmul float %218, %229 %233 = fmul float %204, %230 %234 = fmul float %204, %231 %235 = fmul float %204, %232 %236 = fmul float %137, 0x3FEA20BD80000000 %237 = fmul float %138, 0x3FE279A740000000 %238 = fadd float %236, %237 %239 = fadd float %238, 0.000000e+00 %240 = call float @llvm.AMDIL.clamp.(float %239, float 0.000000e+00, float 1.000000e+00) %241 = fmul float %136, 0xBFE6A09E60000000 %242 = fmul float %137, 0xBFDA20BD80000000 %243 = fadd float %242, %241 %244 = fmul float %138, 0x3FE279A740000000 %245 = fadd float %243, %244 %246 = call float @llvm.AMDIL.clamp.(float %245, float 0.000000e+00, float 1.000000e+00) %247 = fmul float %137, 0xBFDA20BD80000000 %248 = fmul float %138, 0x3FE279A740000000 %249 = fadd float %248, %247 %250 = fmul float %136, 0x3FE6A09E60000000 %251 = fadd float %249, %250 %252 = call float @llvm.AMDIL.clamp.(float %251, float 0.000000e+00, float 1.000000e+00) %253 = fmul float %240, %240 %254 = fmul float %246, %246 %255 = fmul float %252, %252 %256 = call float @llvm.maxnum.f32(float %253, float 0x3EB0C6F7A0000000) %257 = call float @llvm.maxnum.f32(float %254, float 0x3EB0C6F7A0000000) %258 = call float @llvm.maxnum.f32(float %255, float 0x3EB0C6F7A0000000) %259 = fmul float %197, %256 %260 = fmul float %198, %257 %261 = fadd float %260, %259 %262 = fmul float %199, %258 %263 = fadd float %261, %262 %264 = fsub float 1.000000e+00, %24 %265 = fsub float 1.000000e+00, %25 %266 = fsub float 1.000000e+00, %26 %267 = bitcast float %81 to i32 %268 = bitcast float %82 to i32 %269 = insertelement <2 x i32> undef, i32 %267, i32 0 %270 = insertelement <2 x i32> %269, i32 %268, i32 1 %271 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %270, <32 x i8> %63, <16 x i8> %66, i32 2) %272 = extractelement <4 x float> %271, i32 0 %273 = extractelement <4 x float> %271, i32 1 %274 = extractelement <4 x float> %271, i32 2 %275 = fmul float %264, %272 %276 = fmul float %265, %273 %277 = fmul float %266, %274 %278 = fmul float %275, %30 %279 = fadd float %278, %27 %280 = fmul float %276, %30 %281 = fadd float %280, %28 %282 = fmul float %277, %30 %283 = fadd float %282, %29 %284 = fmul float %227, %279 %285 = fmul float %228, %281 %286 = fmul float %229, %283 %287 = fmul float %284, %263 %288 = fadd float %287, %233 %289 = fmul float %285, %263 %290 = fadd float %289, %234 %291 = fmul float %286, %263 %292 = fadd float %291, %235 %293 = fadd float %288, %24 %294 = fadd float %290, %25 %295 = fadd float %292, %26 %296 = fmul float %279, %42 %297 = fadd float %296, %293 %298 = fmul float %281, %43 %299 = fadd float %298, %294 %300 = fmul float %283, %44 %301 = fadd float %300, %295 %302 = fmul float %83, 3.906250e-03 %303 = call i32 @llvm.SI.packf16(float %297, float %299) %304 = bitcast i32 %303 to float %305 = call i32 @llvm.SI.packf16(float %301, float %302) %306 = bitcast i32 %305 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %304, float %306, float %304, float %306) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 s_load_dwordx4 s[8:11], s[4:5], 0x14 ; C0840514 s_load_dwordx8 s[12:19], s[6:7], 0x28 ; C0C60728 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 3, 2, [m0] ; C8180B00 v_interp_p2_f32 v6, [v6], v1, 3, 2, [m0] ; C8190B01 v_interp_p1_f32 v7, v0, 0, 3, [m0] ; C81C0C00 v_interp_p2_f32 v7, [v7], v1, 0, 3, [m0] ; C81D0C01 v_interp_p1_f32 v8, v0, 1, 3, [m0] ; C8200D00 v_interp_p2_f32 v8, [v8], v1, 1, 3, [m0] ; C8210D01 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800100 00430104 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v9, 0xbeaaa64c ; 7E1202FF BEAAA64C s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v1, v1, v9 ; 06021301 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, 0, -1.0, vcc ; D2000001 01A9E680 v_cndmask_b32_e64 v1, v1, -1.0, vcc ; D2000001 01A9E701 v_cndmask_b32_e64 v1, v1, -1.0, vcc ; D2000001 01A9E701 s_load_dwordx4 s[44:47], s[4:5], 0x4 ; C0960504 s_load_dwordx4 s[56:59], s[4:5], 0x8 ; C09C0508 s_load_dwordx8 s[48:55], s[6:7], 0x8 ; C0D80708 s_load_dwordx8 s[60:67], s[6:7], 0x10 ; C0DE0710 s_load_dwordx8 s[32:39], s[6:7], 0x20 ; C0D00720 s_load_dwordx4 s[40:43], s[4:5], 0x10 ; C0940510 v_cndmask_b32_e64 v1, v1, -1.0, vcc ; D2000001 01A9E701 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s68, s[0:3], 0x14 ; C2220114 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C s_load_dwordx8 s[12:19], s[6:7], 0x18 ; C0C60718 v_cmpx_le_f32_e32 vcc, 0, v1 ; 7C260280 image_sample v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[60:67], s[56:59] ; F0800700 01CF0904 image_sample v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[48:55], s[44:47] ; F0800700 016C0C02 image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[32:39], s[40:43] ; F0800700 01480F04 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_buffer_load_dword s5, s[0:3], 0x15 ; C2028115 s_buffer_load_dword s6, s[0:3], 0x16 ; C2030116 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[20:23] ; F0800700 00A60102 s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800700 00431204 v_mov_b32_e32 v4, s68 ; 7E080244 s_buffer_load_dword s8, s[0:3], 0x1d ; C204011D s_buffer_load_dword s9, s[0:3], 0x1e ; C204811E s_buffer_load_dword s10, s[0:3], 0x18 ; C2050118 s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119 s_buffer_load_dword s12, s[0:3], 0x1a ; C206011A s_waitcnt vmcnt(2) ; BF8C0772 v_mad_f32 v4, s4, v15, v4 ; D2820004 04121E04 v_mov_b32_e32 v5, s5 ; 7E0A0205 v_mad_f32 v5, s4, v16, v5 ; D2820005 04162004 v_mov_b32_e32 v15, s6 ; 7E1E0206 v_mad_f32 v15, s4, v17, v15 ; D282000F 043E2204 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v12, s7, v12 ; 10181807 s_buffer_load_dword s4, s[0:3], 0x20 ; C2020120 s_buffer_load_dword s5, s[0:3], 0x24 ; C2028124 s_buffer_load_dword s6, s[0:3], 0x25 ; C2030125 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v13, s8, v13 ; 101A1A08 v_mul_f32_e32 v14, s9, v14 ; 101C1C09 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 v_mul_f32_e32 v1, s10, v1 ; 1002020A s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s9, s[0:3], 0x13 ; C2048113 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s13, s[0:3], 0x10 ; C2068110 s_buffer_load_dword s14, s[0:3], 0x11 ; C2070111 s_buffer_load_dword s15, s[0:3], 0x12 ; C2078112 v_mul_f32_e32 v2, s11, v2 ; 1004040B v_mul_f32_e32 v3, s12, v3 ; 1006060C s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v16, 1.0, s7 ; D2080010 00000EF2 v_mul_f32_e32 v16, v18, v16 ; 10202112 v_sub_f32_e64 v17, 1.0, s8 ; D2080011 000010F2 v_mul_f32_e32 v17, v19, v17 ; 10222313 v_sub_f32_e64 v18, 1.0, s10 ; D2080012 000014F2 v_mul_f32_e32 v18, v20, v18 ; 10242514 v_mov_b32_e32 v19, s13 ; 7E26020D v_mad_f32 v16, s9, v16, v19 ; D2820010 044E2009 v_mov_b32_e32 v19, s14 ; 7E26020E v_mad_f32 v17, s9, v17, v19 ; D2820011 044E2209 v_mov_b32_e32 v19, s15 ; 7E26020F v_mad_f32 v18, s9, v18, v19 ; D2820012 044E2409 v_mad_f32 v9, 2.0, v9, -1.0 ; D2820009 03CE12F4 v_mad_f32 v10, 2.0, v10, -1.0 ; D282000A 03CE14F4 v_mad_f32 v11, 2.0, v11, -1.0 ; D282000B 03CE16F4 v_mul_f32_e32 v19, v9, v9 ; 10261309 v_mad_f32 v19, v10, v10, v19 ; D2820013 044E150A v_mad_f32 v19, v11, v11, v19 ; D2820013 044E170B v_rsq_clamp_f32_e32 v19, v19 ; 7E265913 v_mul_f32_e32 v20, v7, v7 ; 10280F07 v_mad_f32 v20, v8, v8, v20 ; D2820014 04521108 v_mad_f32 v20, v0, v0, v20 ; D2820014 04520100 v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 v_min_f32_e32 v19, 0x7f7fffff, v19 ; 1E2626FF 7F7FFFFF v_mul_f32_e32 v9, v19, v9 ; 10121313 v_mul_f32_e32 v10, v19, v10 ; 10141513 v_mul_f32_e32 v11, v19, v11 ; 10161713 v_min_f32_e32 v19, 0x7f7fffff, v20 ; 1E2628FF 7F7FFFFF v_mul_f32_e32 v20, v19, v7 ; 10280F13 v_mul_f32_e32 v20, v20, v9 ; 10281314 v_mul_f32_e32 v21, v19, v8 ; 102A1113 v_mad_f32 v20, v10, v21, v20 ; D2820014 04522B0A v_mul_f32_e32 v21, v19, v0 ; 102A0113 v_mad_f32 v20, v11, v21, v20 ; D2820014 04522B0B v_mul_f32_e32 v21, v9, v20 ; 102A2909 v_mad_f32 v21, v20, v9, v21 ; D2820015 04561314 v_mad_f32 v7, -v7, v19, v21 ; D2820007 24562707 v_mul_f32_e32 v21, v10, v20 ; 102A290A v_mad_f32 v21, v20, v10, v21 ; D2820015 04561514 v_mad_f32 v8, -v8, v19, v21 ; D2820008 24562708 v_mul_f32_e32 v21, v11, v20 ; 102A290B v_mad_f32 v20, v20, v11, v21 ; D2820014 04561714 v_mad_f32 v0, -v0, v19, v20 ; D2820000 24522700 v_mov_b32_e32 v19, 0x3f13cd3a ; 7E2602FF 3F13CD3A v_mad_f32 v20, v0, v19, 0 ; D2820014 02022700 v_mov_b32_e32 v21, 0x3f5105ec ; 7E2A02FF 3F5105EC v_mad_f32 v20, v8, v21, v20 ; D2820014 04522B08 v_mov_b32_e32 v22, 0xbf3504f3 ; 7E2C02FF BF3504F3 v_mul_f32_e32 v23, v22, v7 ; 102E0F16 v_mov_b32_e32 v24, 0xbed105ec ; 7E3002FF BED105EC v_mad_f32 v23, v8, v24, v23 ; D2820017 045E3108 v_mul_f32_e32 v8, v24, v8 ; 10101118 v_mad_f32 v23, v0, v19, v23 ; D2820017 045E2700 v_mad_f32 v0, v0, v19, v8 ; D2820000 04222700 v_mov_b32_e32 v8, 0x3f3504f3 ; 7E1002FF 3F3504F3 v_mad_f32 v0, v7, v8, v0 ; D2820000 04021107 v_add_f32_e64 v7, 0, v20 clamp ; D2060807 00022880 v_add_f32_e64 v20, 0, v23 clamp ; D2060814 00022E80 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_max_f32_e32 v7, 0x358637bd, v7 ; 200E0EFF 358637BD v_max_f32_e32 v20, 0x358637bd, v20 ; 202828FF 358637BD v_max_f32_e32 v0, 0x358637bd, v0 ; 200000FF 358637BD v_log_f32_e64 v7, |v7| ; D34E0107 00000107 v_log_f32_e64 v20, |v20| ; D34E0114 00000114 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mov_b32_e32 v23, 0xff7fffff ; 7E2E02FF FF7FFFFF v_max_f32_e32 v7, v23, v7 ; 200E0F17 v_max_f32_e32 v20, v23, v20 ; 20282917 v_max_f32_e32 v0, v23, v0 ; 20000117 v_add_f32_e64 v23, 1.0, s4 ; D2060017 000008F2 v_mul_f32_e32 v7, v23, v7 ; 100E0F17 v_mul_f32_e32 v20, v23, v20 ; 10282917 v_mul_f32_e32 v0, v23, v0 ; 10000117 v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_exp_f32_e32 v20, v20 ; 7E284B14 v_mul_f32_e32 v7, v7, v12 ; 100E1907 v_mad_f32 v7, v13, v20, v7 ; D2820007 041E290D v_exp_f32_e32 v0, v0 ; 7E004B00 v_mad_f32 v0, v14, v0, v7 ; D2820000 041E010E s_buffer_load_dword s0, s[0:3], 0x26 ; C2000126 v_mul_f32_e32 v4, v1, v4 ; 10080901 v_mad_f32 v4, v0, v4, s7 ; D2820004 001E0900 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 v_mad_f32 v5, v0, v5, s8 ; D2820005 00220B00 v_mul_f32_e32 v7, v3, v15 ; 100E1F03 v_mad_f32 v0, v0, v7, s10 ; D2820000 002A0F00 v_mad_f32 v7, v11, v19, 0 ; D2820007 0202270B v_mad_f32 v7, v10, v21, v7 ; D2820007 041E2B0A v_mul_f32_e32 v15, v22, v9 ; 101E1316 v_mad_f32 v15, v10, v24, v15 ; D282000F 043E310A v_mul_f32_e32 v10, v24, v10 ; 10141518 v_mad_f32 v15, v11, v19, v15 ; D282000F 043E270B v_mad_f32 v10, v11, v19, v10 ; D282000A 042A270B v_mad_f32 v8, v9, v8, v10 ; D2820008 042A1109 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 v_mul_f32_e32 v7, v7, v7 ; 100E0F07 v_max_f32_e32 v7, 0x358637bd, v7 ; 200E0EFF 358637BD v_mul_f32_e32 v7, v7, v12 ; 100E1907 v_add_f32_e64 v9, 0, v15 clamp ; D2060809 00021E80 v_mul_f32_e32 v9, v9, v9 ; 10121309 v_max_f32_e32 v9, 0x358637bd, v9 ; 201212FF 358637BD v_mad_f32 v7, v13, v9, v7 ; D2820007 041E130D v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 v_mul_f32_e32 v8, v8, v8 ; 10101108 v_max_f32_e32 v8, 0x358637bd, v8 ; 201010FF 358637BD v_mad_f32 v7, v14, v8, v7 ; D2820007 041E110E v_mul_f32_e32 v1, v16, v1 ; 10020310 v_mad_f32 v1, v1, v7, v4 ; D2820001 04120F01 v_mad_f32 v1, v16, s5, v1 ; D2820001 04040B10 v_mul_f32_e32 v2, v17, v2 ; 10040511 v_mad_f32 v2, v2, v7, v5 ; D2820002 04160F02 v_mad_f32 v2, v17, s6, v2 ; D2820002 04080D11 v_mul_f32_e32 v3, v18, v3 ; 10060712 v_mad_f32 v0, v3, v7, v0 ; D2820000 04020F03 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v18, s0, v0 ; D2820000 04000112 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_mul_f32_e32 v2, 0x3b800000, v6 ; 10040CFF 3B800000 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 28 Code Size: 1168 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], COLOR, PERSPECTIVE DCL IN[1], COLOR[1], PERSPECTIVE DCL IN[2], TEXCOORD[0], PERSPECTIVE DCL IN[3], TEXCOORD[1], PERSPECTIVE DCL IN[4], TEXCOORD[5], PERSPECTIVE DCL IN[5], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL CONST[0..10] DCL TEMP[0..2] DCL TEMP[3], LOCAL DCL TEMP[4..8] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { 0.8165, 0.5774, 0.0000, 0.0039} IMM[2] FLT32 { -0.7071, -0.4082, 0.5774, 0.7071} IMM[3] FLT32 {340282346638528859811704183484516925440.0000, -340282346638528859811704183484516925440.0000, 0.0000, 0.0000} 0: TEX TEMP[0], IN[3], SAMP[6], 2D 1: MAD TEMP[0].xyz, TEMP[0], CONST[5].wwww, CONST[5] 2: TEX TEMP[1], IN[2], SAMP[0], 2D 3: MUL TEMP[1].xyz, TEMP[1], CONST[6] 4: MUL TEMP[0].xyz, TEMP[0], TEMP[1] 5: MOV TEMP[2].z, IMM[0].zzzz 6: ADD TEMP[0].w, TEMP[2].zzzz, CONST[9].xxxx 7: DP3 TEMP[3].x, IN[5], IN[5] 8: RSQ TEMP[3].x, TEMP[3].xxxx 9: MIN TEMP[3].x, IMM[3].xxxx, TEMP[3].xxxx 10: MUL TEMP[4].xyz, IN[5], TEMP[3].xxxx 11: TEX TEMP[5], IN[3], SAMP[3], 2D 12: MAD TEMP[2].xyw, TEMP[5].xyzz, IMM[0].xxxx, IMM[0].yyyy 13: DP3 TEMP[3].x, TEMP[2].xyww, TEMP[2].xyww 14: RSQ TEMP[3].x, TEMP[3].xxxx 15: MIN TEMP[3].x, IMM[3].xxxx, TEMP[3].xxxx 16: MUL TEMP[5].xyz, TEMP[2].xyww, TEMP[3].xxxx 17: DP3 TEMP[1].w, TEMP[5], TEMP[4] 18: MUL TEMP[2].xyw, TEMP[1].wwww, TEMP[5].xyzz 19: MAD TEMP[2].xyw, TEMP[2], IMM[0].xxxx, -TEMP[4].xyzz 20: DP2 TEMP[3].x, TEMP[2].ywzw, IMM[1] 21: ADD_SAT TEMP[4].x, IMM[1].zzzz, TEMP[3].xxxx 22: DP3_SAT TEMP[4].y, TEMP[2].xyww, IMM[2] 23: DP3_SAT TEMP[4].z, TEMP[2].ywxw, IMM[2].yzww 24: MAX TEMP[6].xyz, TEMP[4], IMM[0].wwww 25: LG2 TEMP[3].x, |TEMP[6].xxxx| 26: MAX TEMP[4].x, IMM[3].yyyy, TEMP[3].xxxx 27: LG2 TEMP[3].x, |TEMP[6].yyyy| 28: MAX TEMP[4].y, IMM[3].yyyy, TEMP[3].xxxx 29: LG2 TEMP[3].x, |TEMP[6].zzzz| 30: MAX TEMP[4].z, IMM[3].yyyy, TEMP[3].xxxx 31: MUL TEMP[4].xyz, TEMP[0].wwww, TEMP[4] 32: EX2 TEMP[6].x, TEMP[4].xxxx 33: EX2 TEMP[6].y, TEMP[4].yyyy 34: EX2 TEMP[6].z, TEMP[4].zzzz 35: TEX TEMP[4], IN[2], SAMP[1], 2D 36: MUL TEMP[4].xyz, TEMP[4], CONST[7] 37: DP3 TEMP[0].w, TEMP[4], TEMP[6] 38: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0] 39: DP3 TEMP[3].x, IN[1], IN[1] 40: RSQ TEMP[3].x, TEMP[3].xxxx 41: MIN TEMP[3].x, IMM[3].xxxx, TEMP[3].xxxx 42: MUL TEMP[6].xyz, IN[1], TEMP[3].xxxx 43: DP3 TEMP[3].x, IN[0], IN[0] 44: RSQ TEMP[3].x, TEMP[3].xxxx 45: MIN TEMP[3].x, IMM[3].xxxx, TEMP[3].xxxx 46: MUL TEMP[7].xyz, IN[0], TEMP[3].xxxx 47: MUL TEMP[8].xyz, TEMP[6].zxyw, TEMP[7].yzxw 48: MAD TEMP[8].xyz, TEMP[6].yzxw, TEMP[7].zxyw, -TEMP[8] 49: DP3 TEMP[6].z, TEMP[6], TEMP[2].xyww 50: DP3 TEMP[6].x, TEMP[7], TEMP[2].xyww 51: MUL TEMP[7].xyz, TEMP[8], IN[1].wwww 52: DP3 TEMP[6].y, TEMP[7], TEMP[2].xyww 53: TEX TEMP[6], TEMP[6], SAMP[2], CUBE 54: MUL TEMP[2].xyw, CONST[8].xyzz, CONST[8].wwww 55: MUL TEMP[2].xyw, TEMP[6].xyzz, TEMP[2] 56: TEX TEMP[6], IN[3], SAMP[4], 2D 57: TEX TEMP[7], IN[3], SAMP[5], 2D 58: MAD TEMP[2].xyw, TEMP[6].xxxx, TEMP[2], TEMP[7].xyzz 59: ADD TEMP[6].xyz, TEMP[2].zzzz, -CONST[0] 60: MUL TEMP[2].xyz, TEMP[2].xyww, TEMP[6] 61: MAD TEMP[2].xyz, TEMP[2], CONST[4].wwww, CONST[4] 62: MUL TEMP[1].xyz, TEMP[1], TEMP[2] 63: DP2 TEMP[3].x, TEMP[5].yzzw, IMM[1] 64: ADD_SAT TEMP[6].x, IMM[1].zzzz, TEMP[3].xxxx 65: DP3_SAT TEMP[6].y, TEMP[5], IMM[2] 66: DP3_SAT TEMP[6].z, TEMP[5].yzxw, IMM[2].yzww 67: MUL TEMP[5].xyz, TEMP[6], TEMP[6] 68: MAX TEMP[6].xyz, TEMP[5], IMM[0].wwww 69: DP3 TEMP[0].w, TEMP[4], TEMP[6] 70: MAD TEMP[0].xyz, TEMP[1], TEMP[0].wwww, TEMP[0] 71: ADD TEMP[0].xyz, TEMP[0], CONST[0] 72: MAD OUT[0].xyz, TEMP[2], CONST[10], TEMP[0] 73: MUL OUT[0].w, IMM[1].wwww, IN[4].wwww 74: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %49 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %50 = load <32 x i8>, <32 x i8> addrspace(2)* %49, align 32, !tbaa !0 %51 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %54 = bitcast <8 x i32> addrspace(2)* %53 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %57 = bitcast <4 x i32> addrspace(2)* %56 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %60 = bitcast <8 x i32> addrspace(2)* %59 to <32 x i8> addrspace(2)* %61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %63 = bitcast <4 x i32> addrspace(2)* %62 to <16 x i8> addrspace(2)* %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %72 = bitcast <8 x i32> addrspace(2)* %71 to <32 x i8> addrspace(2)* %73 = load <32 x i8>, <32 x i8> addrspace(2)* %72, align 32, !tbaa !0 %74 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %75 = bitcast <4 x i32> addrspace(2)* %74 to <16 x i8> addrspace(2)* %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 %77 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %78 = bitcast <8 x i32> addrspace(2)* %77 to <32 x i8> addrspace(2)* %79 = load <32 x i8>, <32 x i8> addrspace(2)* %78, align 32, !tbaa !0 %80 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %81 = bitcast <4 x i32> addrspace(2)* %80 to <16 x i8> addrspace(2)* %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %84 = bitcast <8 x i32> addrspace(2)* %83 to <32 x i8> addrspace(2)* %85 = load <32 x i8>, <32 x i8> addrspace(2)* %84, align 32, !tbaa !0 %86 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %87 = bitcast <4 x i32> addrspace(2)* %86 to <16 x i8> addrspace(2)* %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %91 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %104 = bitcast float %98 to i32 %105 = bitcast float %99 to i32 %106 = insertelement <2 x i32> undef, i32 %104, i32 0 %107 = insertelement <2 x i32> %106, i32 %105, i32 1 %108 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %107, <32 x i8> %85, <16 x i8> %88, i32 2) %109 = extractelement <4 x float> %108, i32 0 %110 = extractelement <4 x float> %108, i32 1 %111 = extractelement <4 x float> %108, i32 2 %112 = fmul float %109, %34 %113 = fadd float %112, %31 %114 = fmul float %110, %34 %115 = fadd float %114, %32 %116 = fmul float %111, %34 %117 = fadd float %116, %33 %118 = bitcast float %96 to i32 %119 = bitcast float %97 to i32 %120 = insertelement <2 x i32> undef, i32 %118, i32 0 %121 = insertelement <2 x i32> %120, i32 %119, i32 1 %122 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %121, <32 x i8> %50, <16 x i8> %52, i32 2) %123 = extractelement <4 x float> %122, i32 0 %124 = extractelement <4 x float> %122, i32 1 %125 = extractelement <4 x float> %122, i32 2 %126 = fmul float %123, %35 %127 = fmul float %124, %36 %128 = fmul float %125, %37 %129 = fmul float %113, %126 %130 = fmul float %115, %127 %131 = fmul float %117, %128 %132 = fadd float %45, 1.000000e+00 %133 = fmul float %101, %101 %134 = fmul float %102, %102 %135 = fadd float %134, %133 %136 = fmul float %103, %103 %137 = fadd float %135, %136 %138 = call float @llvm.AMDGPU.rsq.clamped.f32(float %137) %139 = call float @llvm.minnum.f32(float %138, float 0x47EFFFFFE0000000) %140 = fmul float %101, %139 %141 = fmul float %102, %139 %142 = fmul float %103, %139 %143 = bitcast float %98 to i32 %144 = bitcast float %99 to i32 %145 = insertelement <2 x i32> undef, i32 %143, i32 0 %146 = insertelement <2 x i32> %145, i32 %144, i32 1 %147 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %146, <32 x i8> %67, <16 x i8> %70, i32 2) %148 = extractelement <4 x float> %147, i32 0 %149 = extractelement <4 x float> %147, i32 1 %150 = extractelement <4 x float> %147, i32 2 %151 = fmul float %148, 2.000000e+00 %152 = fadd float %151, -1.000000e+00 %153 = fmul float %149, 2.000000e+00 %154 = fadd float %153, -1.000000e+00 %155 = fmul float %150, 2.000000e+00 %156 = fadd float %155, -1.000000e+00 %157 = fmul float %152, %152 %158 = fmul float %154, %154 %159 = fadd float %158, %157 %160 = fmul float %156, %156 %161 = fadd float %159, %160 %162 = call float @llvm.AMDGPU.rsq.clamped.f32(float %161) %163 = call float @llvm.minnum.f32(float %162, float 0x47EFFFFFE0000000) %164 = fmul float %152, %163 %165 = fmul float %154, %163 %166 = fmul float %156, %163 %167 = fmul float %164, %140 %168 = fmul float %165, %141 %169 = fadd float %168, %167 %170 = fmul float %166, %142 %171 = fadd float %169, %170 %172 = fmul float %171, %164 %173 = fmul float %171, %165 %174 = fmul float %171, %166 %175 = fmul float %172, 2.000000e+00 %176 = fsub float %175, %140 %177 = fmul float %173, 2.000000e+00 %178 = fsub float %177, %141 %179 = fmul float %174, 2.000000e+00 %180 = fsub float %179, %142 %181 = fmul float %178, 0x3FEA20BD80000000 %182 = fmul float %180, 0x3FE279A740000000 %183 = fadd float %181, %182 %184 = fadd float %183, 0.000000e+00 %185 = call float @llvm.AMDIL.clamp.(float %184, float 0.000000e+00, float 1.000000e+00) %186 = fmul float %176, 0xBFE6A09E60000000 %187 = fmul float %178, 0xBFDA20BD80000000 %188 = fadd float %187, %186 %189 = fmul float %180, 0x3FE279A740000000 %190 = fadd float %188, %189 %191 = call float @llvm.AMDIL.clamp.(float %190, float 0.000000e+00, float 1.000000e+00) %192 = fmul float %178, 0xBFDA20BD80000000 %193 = fmul float %180, 0x3FE279A740000000 %194 = fadd float %193, %192 %195 = fmul float %176, 0x3FE6A09E60000000 %196 = fadd float %194, %195 %197 = call float @llvm.AMDIL.clamp.(float %196, float 0.000000e+00, float 1.000000e+00) %198 = call float @llvm.maxnum.f32(float %185, float 0x3EB0C6F7A0000000) %199 = call float @llvm.maxnum.f32(float %191, float 0x3EB0C6F7A0000000) %200 = call float @llvm.maxnum.f32(float %197, float 0x3EB0C6F7A0000000) %201 = call float @fabs(float %198) %202 = call float @llvm.log2.f32(float %201) %203 = call float @llvm.maxnum.f32(float %202, float 0xC7EFFFFFE0000000) %204 = call float @fabs(float %199) %205 = call float @llvm.log2.f32(float %204) %206 = call float @llvm.maxnum.f32(float %205, float 0xC7EFFFFFE0000000) %207 = call float @fabs(float %200) %208 = call float @llvm.log2.f32(float %207) %209 = call float @llvm.maxnum.f32(float %208, float 0xC7EFFFFFE0000000) %210 = fmul float %132, %203 %211 = fmul float %132, %206 %212 = fmul float %132, %209 %213 = call float @llvm.AMDIL.exp.(float %210) %214 = call float @llvm.AMDIL.exp.(float %211) %215 = call float @llvm.AMDIL.exp.(float %212) %216 = bitcast float %96 to i32 %217 = bitcast float %97 to i32 %218 = insertelement <2 x i32> undef, i32 %216, i32 0 %219 = insertelement <2 x i32> %218, i32 %217, i32 1 %220 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %219, <32 x i8> %55, <16 x i8> %58, i32 2) %221 = extractelement <4 x float> %220, i32 0 %222 = extractelement <4 x float> %220, i32 1 %223 = extractelement <4 x float> %220, i32 2 %224 = fmul float %221, %38 %225 = fmul float %222, %39 %226 = fmul float %223, %40 %227 = fmul float %224, %213 %228 = fmul float %225, %214 %229 = fadd float %228, %227 %230 = fmul float %226, %215 %231 = fadd float %229, %230 %232 = fmul float %231, %129 %233 = fmul float %231, %130 %234 = fmul float %231, %131 %235 = fmul float %92, %92 %236 = fmul float %93, %93 %237 = fadd float %236, %235 %238 = fmul float %94, %94 %239 = fadd float %237, %238 %240 = call float @llvm.AMDGPU.rsq.clamped.f32(float %239) %241 = call float @llvm.minnum.f32(float %240, float 0x47EFFFFFE0000000) %242 = fmul float %92, %241 %243 = fmul float %93, %241 %244 = fmul float %94, %241 %245 = fmul float %89, %89 %246 = fmul float %90, %90 %247 = fadd float %246, %245 %248 = fmul float %91, %91 %249 = fadd float %247, %248 %250 = call float @llvm.AMDGPU.rsq.clamped.f32(float %249) %251 = call float @llvm.minnum.f32(float %250, float 0x47EFFFFFE0000000) %252 = fmul float %89, %251 %253 = fmul float %90, %251 %254 = fmul float %91, %251 %255 = fmul float %244, %253 %256 = fmul float %242, %254 %257 = fmul float %243, %252 %258 = fmul float %243, %254 %259 = fsub float %258, %255 %260 = fmul float %244, %252 %261 = fsub float %260, %256 %262 = fmul float %242, %253 %263 = fsub float %262, %257 %264 = fmul float %242, %176 %265 = fmul float %243, %178 %266 = fadd float %265, %264 %267 = fmul float %244, %180 %268 = fadd float %266, %267 %269 = fmul float %252, %176 %270 = fmul float %253, %178 %271 = fadd float %270, %269 %272 = fmul float %254, %180 %273 = fadd float %271, %272 %274 = fmul float %259, %95 %275 = fmul float %261, %95 %276 = fmul float %263, %95 %277 = fmul float %274, %176 %278 = fmul float %275, %178 %279 = fadd float %278, %277 %280 = fmul float %276, %180 %281 = fadd float %279, %280 %282 = insertelement <4 x float> undef, float %273, i32 0 %283 = insertelement <4 x float> %282, float %281, i32 1 %284 = insertelement <4 x float> %283, float %268, i32 2 %285 = insertelement <4 x float> %284, float 0.000000e+00, i32 3 %286 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %285) %287 = extractelement <4 x float> %286, i32 0 %288 = extractelement <4 x float> %286, i32 1 %289 = extractelement <4 x float> %286, i32 2 %290 = extractelement <4 x float> %286, i32 3 %291 = call float @fabs(float %289) %292 = fdiv float 1.000000e+00, %291 %293 = fmul float %287, %292 %294 = fadd float %293, 1.500000e+00 %295 = fmul float %288, %292 %296 = fadd float %295, 1.500000e+00 %297 = bitcast float %296 to i32 %298 = bitcast float %294 to i32 %299 = bitcast float %290 to i32 %300 = insertelement <4 x i32> undef, i32 %297, i32 0 %301 = insertelement <4 x i32> %300, i32 %298, i32 1 %302 = insertelement <4 x i32> %301, i32 %299, i32 2 %303 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %302, <32 x i8> %61, <16 x i8> %64, i32 4) %304 = extractelement <4 x float> %303, i32 0 %305 = extractelement <4 x float> %303, i32 1 %306 = extractelement <4 x float> %303, i32 2 %307 = fmul float %41, %44 %308 = fmul float %42, %44 %309 = fmul float %43, %44 %310 = fmul float %304, %307 %311 = fmul float %305, %308 %312 = fmul float %306, %309 %313 = bitcast float %98 to i32 %314 = bitcast float %99 to i32 %315 = insertelement <2 x i32> undef, i32 %313, i32 0 %316 = insertelement <2 x i32> %315, i32 %314, i32 1 %317 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %316, <32 x i8> %73, <16 x i8> %76, i32 2) %318 = extractelement <4 x float> %317, i32 0 %319 = bitcast float %98 to i32 %320 = bitcast float %99 to i32 %321 = insertelement <2 x i32> undef, i32 %319, i32 0 %322 = insertelement <2 x i32> %321, i32 %320, i32 1 %323 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %322, <32 x i8> %79, <16 x i8> %82, i32 2) %324 = extractelement <4 x float> %323, i32 0 %325 = extractelement <4 x float> %323, i32 1 %326 = extractelement <4 x float> %323, i32 2 %327 = fmul float %318, %310 %328 = fadd float %327, %324 %329 = fmul float %318, %311 %330 = fadd float %329, %325 %331 = fmul float %318, %312 %332 = fadd float %331, %326 %333 = fsub float 1.000000e+00, %24 %334 = fsub float 1.000000e+00, %25 %335 = fsub float 1.000000e+00, %26 %336 = fmul float %328, %333 %337 = fmul float %330, %334 %338 = fmul float %332, %335 %339 = fmul float %336, %30 %340 = fadd float %339, %27 %341 = fmul float %337, %30 %342 = fadd float %341, %28 %343 = fmul float %338, %30 %344 = fadd float %343, %29 %345 = fmul float %126, %340 %346 = fmul float %127, %342 %347 = fmul float %128, %344 %348 = fmul float %165, 0x3FEA20BD80000000 %349 = fmul float %166, 0x3FE279A740000000 %350 = fadd float %348, %349 %351 = fadd float %350, 0.000000e+00 %352 = call float @llvm.AMDIL.clamp.(float %351, float 0.000000e+00, float 1.000000e+00) %353 = fmul float %164, 0xBFE6A09E60000000 %354 = fmul float %165, 0xBFDA20BD80000000 %355 = fadd float %354, %353 %356 = fmul float %166, 0x3FE279A740000000 %357 = fadd float %355, %356 %358 = call float @llvm.AMDIL.clamp.(float %357, float 0.000000e+00, float 1.000000e+00) %359 = fmul float %165, 0xBFDA20BD80000000 %360 = fmul float %166, 0x3FE279A740000000 %361 = fadd float %360, %359 %362 = fmul float %164, 0x3FE6A09E60000000 %363 = fadd float %361, %362 %364 = call float @llvm.AMDIL.clamp.(float %363, float 0.000000e+00, float 1.000000e+00) %365 = fmul float %352, %352 %366 = fmul float %358, %358 %367 = fmul float %364, %364 %368 = call float @llvm.maxnum.f32(float %365, float 0x3EB0C6F7A0000000) %369 = call float @llvm.maxnum.f32(float %366, float 0x3EB0C6F7A0000000) %370 = call float @llvm.maxnum.f32(float %367, float 0x3EB0C6F7A0000000) %371 = fmul float %224, %368 %372 = fmul float %225, %369 %373 = fadd float %372, %371 %374 = fmul float %226, %370 %375 = fadd float %373, %374 %376 = fmul float %345, %375 %377 = fadd float %376, %232 %378 = fmul float %346, %375 %379 = fadd float %378, %233 %380 = fmul float %347, %375 %381 = fadd float %380, %234 %382 = fadd float %377, %24 %383 = fadd float %379, %25 %384 = fadd float %381, %26 %385 = fmul float %340, %46 %386 = fadd float %385, %382 %387 = fmul float %342, %47 %388 = fadd float %387, %383 %389 = fmul float %344, %48 %390 = fadd float %389, %384 %391 = fmul float %100, 3.906250e-03 %392 = call i32 @llvm.SI.packf16(float %386, float %388) %393 = bitcast i32 %392 to float %394 = call i32 @llvm.SI.packf16(float %390, float %391) %395 = bitcast i32 %394 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %393, float %395, float %393, float %395) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_load_dwordx4 s[48:51], s[4:5], 0x0 ; C0980500 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700 v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701 v_mul_f32_e32 v9, v5, v5 ; 10120B05 v_mad_f32 v9, v6, v6, v9 ; D2820009 04260D06 v_mad_f32 v9, v7, v7, v9 ; D2820009 04260F07 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v13, v0, 1, 3, [m0] ; C8340D00 v_interp_p2_f32 v13, [v13], v1, 1, 3, [m0] ; C8350D01 v_interp_p1_f32 v14, v0, 3, 4, [m0] ; C8381300 v_interp_p2_f32 v14, [v14], v1, 3, 4, [m0] ; C8391301 v_interp_p1_f32 v15, v0, 0, 5, [m0] ; C83C1400 v_interp_p2_f32 v15, [v15], v1, 0, 5, [m0] ; C83D1401 v_interp_p1_f32 v16, v0, 1, 5, [m0] ; C8401500 v_interp_p2_f32 v16, [v16], v1, 1, 5, [m0] ; C8411501 v_interp_p1_f32 v0, v0, 2, 5, [m0] ; C8001600 v_interp_p2_f32 v0, [v0], v1, 2, 5, [m0] ; C8011601 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx4 s[44:47], s[4:5], 0xc ; C096050C s_load_dwordx4 s[8:11], s[4:5], 0x10 ; C0840510 s_load_dwordx4 s[60:63], s[4:5], 0x18 ; C09E0518 s_load_dwordx8 s[64:71], s[6:7], 0x0 ; C0E00700 s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708 s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710 s_load_dwordx8 s[52:59], s[6:7], 0x18 ; C0DA0718 s_load_dwordx8 s[16:23], s[6:7], 0x20 ; C0C80720 s_load_dwordx8 s[72:79], s[6:7], 0x30 ; C0E40730 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[72:79], s[60:63] ; F0800700 01F2110C image_sample v[20:22], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[64:71], s[48:51] ; F0800700 0190140A image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[52:59], s[44:47] ; F0800700 016D170C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, 2.0, v23, -1.0 ; D2820001 03CE2EF4 v_mad_f32 v23, 2.0, v24, -1.0 ; D2820017 03CE30F4 v_mad_f32 v24, 2.0, v25, -1.0 ; D2820018 03CE32F4 v_mul_f32_e32 v25, v1, v1 ; 10320301 v_mad_f32 v25, v23, v23, v25 ; D2820019 04662F17 v_mad_f32 v25, v24, v24, v25 ; D2820019 04663118 v_rsq_clamp_f32_e32 v25, v25 ; 7E325919 v_mul_f32_e32 v26, v15, v15 ; 10341F0F v_mad_f32 v26, v16, v16, v26 ; D282001A 046A2110 v_mad_f32 v26, v0, v0, v26 ; D282001A 046A0100 v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A v_min_f32_e32 v25, 0x7f7fffff, v25 ; 1E3232FF 7F7FFFFF v_mul_f32_e32 v1, v25, v1 ; 10020319 v_mul_f32_e32 v23, v25, v23 ; 102E2F19 v_mul_f32_e32 v24, v25, v24 ; 10303119 v_min_f32_e32 v25, 0x7f7fffff, v26 ; 1E3234FF 7F7FFFFF v_mul_f32_e32 v26, v25, v15 ; 10341F19 v_mul_f32_e32 v26, v26, v1 ; 1034031A v_mul_f32_e32 v27, v25, v16 ; 10362119 v_mad_f32 v26, v23, v27, v26 ; D282001A 046A3717 v_mul_f32_e32 v27, v25, v0 ; 10360119 v_mad_f32 v26, v24, v27, v26 ; D282001A 046A3718 v_mul_f32_e32 v27, v1, v26 ; 10363501 v_mad_f32 v27, v26, v1, v27 ; D282001B 046E031A v_mad_f32 v15, -v15, v25, v27 ; D282000F 246E330F v_mul_f32_e32 v27, v23, v26 ; 10363517 v_mad_f32 v27, v26, v23, v27 ; D282001B 046E2F1A v_mad_f32 v16, -v16, v25, v27 ; D2820010 246E3310 v_mul_f32_e32 v27, v24, v26 ; 10363518 v_mad_f32 v26, v26, v24, v27 ; D282001A 046E311A v_mad_f32 v0, -v0, v25, v26 ; D2820000 246A3300 v_min_f32_e32 v9, 0x7f7fffff, v9 ; 1E1212FF 7F7FFFFF v_mul_f32_e32 v25, v2, v2 ; 10320502 v_mad_f32 v25, v3, v3, v25 ; D2820019 04660703 v_mad_f32 v25, v4, v4, v25 ; D2820019 04660904 v_rsq_clamp_f32_e32 v25, v25 ; 7E325919 v_mul_f32_e32 v5, v9, v5 ; 100A0B09 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v7, v9, v7 ; 100E0F09 v_min_f32_e32 v9, 0x7f7fffff, v25 ; 1E1232FF 7F7FFFFF v_mul_f32_e32 v2, v9, v2 ; 10040509 v_mul_f32_e32 v3, v9, v3 ; 10060709 v_mul_f32_e32 v4, v9, v4 ; 10080909 v_mul_f32_e32 v9, v15, v5 ; 10120B0F v_mad_f32 v9, v6, v16, v9 ; D2820009 04262106 v_mov_b32_e32 v28, 0 ; 7E380280 v_mad_f32 v27, v7, v0, v9 ; D282001B 04260107 v_mul_f32_e32 v9, v4, v5 ; 10120B04 v_mad_f32 v9, v7, v2, -v9 ; D2820009 84260507 v_mul_f32_e32 v7, v3, v7 ; 100E0F03 v_mad_f32 v7, v6, v4, -v7 ; D2820007 841E0906 v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mad_f32 v5, v5, v3, -v6 ; D2820005 841A0705 v_mul_f32_e32 v2, v15, v2 ; 1004050F v_mad_f32 v2, v3, v16, v2 ; D2820002 040A2103 v_mad_f32 v25, v4, v0, v2 ; D2820019 040A0104 v_mul_f32_e32 v2, v8, v7 ; 10040F08 v_mul_f32_e32 v3, v8, v9 ; 10061308 v_mul_f32_e32 v4, v8, v5 ; 10080B08 v_mul_f32_e32 v2, v15, v2 ; 1004050F v_mad_f32 v2, v3, v16, v2 ; D2820002 040A2103 v_mad_f32 v26, v4, v0, v2 ; D282001A 040A0104 v_cubeid_f32 v5, v25, v26, v27 ; D2880005 046E3519 v_cubema_f32 v4, v25, v26, v27 ; D28E0004 046E3519 v_cubesc_f32 v3, v25, v26, v27 ; D28A0003 046E3519 v_cubetc_f32 v2, v25, v26, v27 ; D28C0002 046E3519 image_sample v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[36:43], s[32:35] ; F0800700 0109060A s_buffer_load_dword s32, s[0:3], 0x23 ; C2100123 v_rcp_f32_e64 v9, |v4| ; D3540109 00000104 s_buffer_load_dword s33, s[0:3], 0x20 ; C2108120 s_buffer_load_dword s34, s[0:3], 0x21 ; C2110121 v_mov_b32_e32 v10, 0x3fc00000 ; 7E1402FF 3FC00000 v_mad_f32 v4, v2, v9, v10 ; D2820004 042A1302 v_mad_f32 v3, v3, v9, v10 ; D2820003 042A1303 s_buffer_load_dword s35, s[0:3], 0x22 ; C2118122 s_waitcnt vmcnt(0) ; BF8C0770 image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[24:31], s[12:15] ; F0800700 00660203 s_buffer_load_dword s12, s[0:3], 0x24 ; C2060124 s_buffer_load_dword s13, s[0:3], 0x28 ; C2068128 s_buffer_load_dword s14, s[0:3], 0x29 ; C2070129 s_buffer_load_dword s15, s[0:3], 0x2a ; C207812A s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s32 ; 7E0A0220 v_mul_f32_e32 v5, s33, v5 ; 100A0A21 v_mov_b32_e32 v9, s32 ; 7E120220 v_mul_f32_e32 v9, s34, v9 ; 10121222 s_load_dwordx4 s[24:27], s[4:5], 0x14 ; C08C0514 s_load_dwordx8 s[36:43], s[6:7], 0x28 ; C0D20728 v_mov_b32_e32 v10, s32 ; 7E140220 v_mul_f32_e32 v10, s35, v10 ; 10141423 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v5, v2 ; 10040505 v_mul_f32_e32 v3, v9, v3 ; 10060709 v_mul_f32_e32 v4, v10, v4 ; 1008090A image_sample v5, 1, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[16:23], s[8:11] ; F0800100 0044050C s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_buffer_load_dword s5, s[0:3], 0x15 ; C2028115 s_buffer_load_dword s6, s[0:3], 0x16 ; C2030116 s_buffer_load_dword s7, s[0:3], 0x17 ; C2038117 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[36:43], s[24:27] ; F0800700 00C9090C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v5, v2, v9 ; D2820002 04260505 v_mad_f32 v3, v5, v3, v10 ; D2820003 042A0705 s_buffer_load_dword s8, s[0:3], 0x18 ; C2040118 s_buffer_load_dword s9, s[0:3], 0x19 ; C2048119 s_buffer_load_dword s10, s[0:3], 0x1a ; C205011A s_buffer_load_dword s11, s[0:3], 0x1c ; C205811C s_buffer_load_dword s16, s[0:3], 0x1d ; C208011D s_buffer_load_dword s17, s[0:3], 0x1e ; C208811E v_mad_f32 v4, v5, v4, v11 ; D2820004 042E0905 v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mov_b32_e32 v9, s5 ; 7E120205 v_mov_b32_e32 v10, s6 ; 7E140206 v_mad_f32 v5, s7, v17, v5 ; D2820005 04162207 v_mad_f32 v9, s7, v18, v9 ; D2820009 04262407 v_mad_f32 v10, s7, v19, v10 ; D282000A 042A2607 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v11, s8, v20 ; 10162808 v_mul_f32_e32 v12, s9, v21 ; 10182A09 v_mul_f32_e32 v13, s10, v22 ; 101A2C0A v_mul_f32_e32 v6, s11, v6 ; 100C0C0B v_mul_f32_e32 v7, s16, v7 ; 100E0E10 v_mul_f32_e32 v8, s17, v8 ; 10101011 v_mov_b32_e32 v17, 0x3f13cd3a ; 7E2202FF 3F13CD3A v_mad_f32 v18, v0, v17, 0 ; D2820012 02022300 v_mov_b32_e32 v19, 0x3f5105ec ; 7E2602FF 3F5105EC v_mad_f32 v18, v16, v19, v18 ; D2820012 044A2710 v_mov_b32_e32 v20, 0xbf3504f3 ; 7E2802FF BF3504F3 v_mul_f32_e32 v21, v20, v15 ; 102A1F14 v_mov_b32_e32 v22, 0xbed105ec ; 7E2C02FF BED105EC v_mad_f32 v21, v16, v22, v21 ; D2820015 04562D10 v_mul_f32_e32 v16, v22, v16 ; 10202116 v_mad_f32 v21, v0, v17, v21 ; D2820015 04562300 v_mad_f32 v0, v0, v17, v16 ; D2820000 04422300 v_mad_f32 v16, v24, v17, 0 ; D2820010 02022318 v_mad_f32 v16, v23, v19, v16 ; D2820010 04422717 v_mul_f32_e32 v19, v20, v1 ; 10260314 v_mad_f32 v19, v23, v22, v19 ; D2820013 044E2D17 v_mul_f32_e32 v20, v22, v23 ; 10282F16 v_mad_f32 v19, v24, v17, v19 ; D2820013 044E2318 v_mad_f32 v17, v24, v17, v20 ; D2820011 04522318 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_buffer_load_dword s5, s[0:3], 0x13 ; C2028113 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 s_buffer_load_dword s8, s[0:3], 0x2 ; C2040102 s_buffer_load_dword s9, s[0:3], 0x10 ; C2048110 s_buffer_load_dword s0, s[0:3], 0x11 ; C2000111 v_mov_b32_e32 v20, 0x3f3504f3 ; 7E2802FF 3F3504F3 v_mad_f32 v0, v15, v20, v0 ; D2820000 0402290F v_mad_f32 v1, v1, v20, v17 ; D2820001 04462901 v_add_f32_e64 v15, 0, v18 clamp ; D206080F 00022480 v_add_f32_e64 v17, 0, v21 clamp ; D2060811 00022A80 v_max_f32_e32 v15, 0x358637bd, v15 ; 201E1EFF 358637BD v_max_f32_e32 v17, 0x358637bd, v17 ; 202222FF 358637BD v_log_f32_e64 v15, |v15| ; D34E010F 0000010F v_log_f32_e64 v17, |v17| ; D34E0111 00000111 v_add_f32_e64 v18, 1.0, s12 ; D2060012 000018F2 v_mov_b32_e32 v20, 0xff7fffff ; 7E2802FF FF7FFFFF v_max_f32_e32 v15, v20, v15 ; 201E1F14 v_max_f32_e32 v17, v20, v17 ; 20222314 v_mul_f32_e32 v15, v15, v18 ; 101E250F v_mul_f32_e32 v17, v17, v18 ; 10222511 v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_exp_f32_e32 v17, v17 ; 7E224B11 v_mul_f32_e32 v15, v15, v6 ; 101E0D0F v_mad_f32 v15, v7, v17, v15 ; D282000F 043E2307 v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 v_mul_f32_e32 v16, v16, v16 ; 10202110 v_max_f32_e32 v16, 0x358637bd, v16 ; 202020FF 358637BD v_mul_f32_e32 v6, v16, v6 ; 100C0D10 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_max_f32_e32 v0, 0x358637bd, v0 ; 200000FF 358637BD v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_add_f32_e64 v16, 0, v19 clamp ; D2060810 00022680 v_mul_f32_e32 v16, v16, v16 ; 10202110 v_max_f32_e32 v16, 0x358637bd, v16 ; 202020FF 358637BD v_mad_f32 v6, v7, v16, v6 ; D2820006 041A2107 v_max_f32_e32 v0, v20, v0 ; 20000114 v_mul_f32_e32 v0, v0, v18 ; 10002500 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mad_f32 v0, v8, v0, v15 ; D2820000 043E0108 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_max_f32_e32 v1, 0x358637bd, v1 ; 200202FF 358637BD v_mad_f32 v1, v8, v1, v6 ; D2820001 041A0308 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v6, 1.0, s6 ; D2080006 00000CF2 v_mul_f32_e32 v2, v6, v2 ; 10040506 v_sub_f32_e64 v6, 1.0, s7 ; D2080006 00000EF2 v_mul_f32_e32 v3, v6, v3 ; 10060706 v_sub_f32_e64 v6, 1.0, s8 ; D2080006 000010F2 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mov_b32_e32 v6, s9 ; 7E0C0209 v_mad_f32 v2, s5, v2, v6 ; D2820002 041A0405 v_mov_b32_e32 v6, s0 ; 7E0C0200 v_mad_f32 v3, s5, v3, v6 ; D2820003 041A0605 v_mov_b32_e32 v6, s4 ; 7E0C0204 v_mad_f32 v4, s5, v4, v6 ; D2820004 041A0805 v_mul_f32_e32 v5, v11, v5 ; 100A0B0B v_mad_f32 v5, v0, v5, s6 ; D2820005 001A0B00 v_mul_f32_e32 v6, v2, v11 ; 100C1702 v_mad_f32 v5, v6, v1, v5 ; D2820005 04160306 v_mul_f32_e32 v6, v12, v9 ; 100C130C v_mad_f32 v6, v0, v6, s7 ; D2820006 001E0D00 v_mul_f32_e32 v7, v3, v12 ; 100E1903 v_mad_f32 v6, v7, v1, v6 ; D2820006 041A0307 v_mul_f32_e32 v7, v13, v10 ; 100E150D v_mad_f32 v0, v0, v7, s8 ; D2820000 00220F00 v_mul_f32_e32 v7, v4, v13 ; 100E1B04 v_mad_f32 v0, v7, v1, v0 ; D2820000 04020307 v_mad_f32 v1, v2, s13, v5 ; D2820001 04141B02 v_mad_f32 v2, v3, s14, v6 ; D2820002 04181D03 v_mad_f32 v0, v4, s15, v0 ; D2820000 04001F04 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_mul_f32_e32 v2, 0x3b800000, v14 ; 10041CFF 3B800000 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 32 Code Size: 1528 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], COLOR, PERSPECTIVE DCL IN[1], COLOR[1], PERSPECTIVE DCL IN[2], TEXCOORD[0], PERSPECTIVE DCL IN[3], TEXCOORD[1], PERSPECTIVE DCL IN[4], TEXCOORD[5], PERSPECTIVE DCL IN[5], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL CONST[0..11] DCL TEMP[0..1] DCL TEMP[2], LOCAL DCL TEMP[3..7] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { 0.8165, 0.5774, 0.0000, 0.0039} IMM[2] FLT32 { -0.7071, -0.4082, 0.5774, 0.7071} IMM[3] FLT32 {340282346638528859811704183484516925440.0000, -340282346638528859811704183484516925440.0000, 0.0000, 0.0000} 0: MUL TEMP[0].xyz, CONST[9], CONST[9].wwww 1: TEX TEMP[1], IN[3], SAMP[5], 2D 2: MUL TEMP[0].xyz, TEMP[0], TEMP[1] 3: DP3 TEMP[2].x, IN[1], IN[1] 4: RSQ TEMP[2].x, TEMP[2].xxxx 5: MIN TEMP[2].x, IMM[3].xxxx, TEMP[2].xxxx 6: MUL TEMP[1].xyz, IN[1], TEMP[2].xxxx 7: DP3 TEMP[2].x, IN[0], IN[0] 8: RSQ TEMP[2].x, TEMP[2].xxxx 9: MIN TEMP[2].x, IMM[3].xxxx, TEMP[2].xxxx 10: MUL TEMP[3].xyz, IN[0], TEMP[2].xxxx 11: MUL TEMP[4].xyz, TEMP[1].zxyw, TEMP[3].yzxw 12: MAD TEMP[4].xyz, TEMP[1].yzxw, TEMP[3].zxyw, -TEMP[4] 13: MUL TEMP[4].xyz, TEMP[4], IN[1].wwww 14: DP3 TEMP[2].x, IN[5], IN[5] 15: RSQ TEMP[2].x, TEMP[2].xxxx 16: MIN TEMP[2].x, IMM[3].xxxx, TEMP[2].xxxx 17: MUL TEMP[5].xyz, IN[5], TEMP[2].xxxx 18: TEX TEMP[6], IN[3], SAMP[3], 2D 19: MAD TEMP[6].xyz, TEMP[6], IMM[0].xxxx, IMM[0].yyyy 20: DP3 TEMP[2].x, TEMP[6], TEMP[6] 21: RSQ TEMP[2].x, TEMP[2].xxxx 22: MIN TEMP[2].x, IMM[3].xxxx, TEMP[2].xxxx 23: MUL TEMP[7].xyz, TEMP[6], TEMP[2].xxxx 24: DP3 TEMP[0].w, TEMP[7], TEMP[5] 25: MUL TEMP[6].xyz, TEMP[0].wwww, TEMP[7] 26: MAD TEMP[5].xyz, TEMP[6], IMM[0].xxxx, -TEMP[5] 27: DP3 TEMP[4].y, TEMP[4], TEMP[5] 28: DP3 TEMP[4].x, TEMP[3], TEMP[5] 29: DP3 TEMP[4].z, TEMP[1], TEMP[5] 30: TEX TEMP[1], TEMP[4], SAMP[2], CUBE 31: MUL TEMP[3].xyz, CONST[8], CONST[8].wwww 32: MUL TEMP[1].xyz, TEMP[1], TEMP[3] 33: TEX TEMP[3], IN[3], SAMP[4], 2D 34: MAD TEMP[0].xyz, TEMP[3].xxxx, TEMP[1], TEMP[0] 35: MOV TEMP[1].z, IMM[0].zzzz 36: ADD TEMP[1].xyw, TEMP[1].zzzz, -CONST[0].xyzz 37: MUL TEMP[0].xyz, TEMP[0], TEMP[1].xyww 38: MAD TEMP[0].xyz, TEMP[0], CONST[4].wwww, CONST[4] 39: TEX TEMP[3], IN[2], SAMP[0], 2D 40: MUL TEMP[1].xyw, TEMP[3].xyzz, CONST[6].xyzz 41: MUL TEMP[3].xyz, TEMP[0], TEMP[1].xyww 42: TEX TEMP[4], IN[3], SAMP[6], 2D 43: MAD TEMP[4].xyz, TEMP[4], CONST[5].wwww, CONST[5] 44: MUL TEMP[1].xyw, TEMP[1], TEMP[4].xyzz 45: DP2 TEMP[2].x, TEMP[5].yzzw, IMM[1] 46: ADD_SAT TEMP[4].x, IMM[1].zzzz, TEMP[2].xxxx 47: DP3_SAT TEMP[4].y, TEMP[5], IMM[2] 48: DP3_SAT TEMP[4].z, TEMP[5].yzxw, IMM[2].yzww 49: MAX TEMP[5].xyz, TEMP[4], IMM[0].wwww 50: LG2 TEMP[2].x, |TEMP[5].xxxx| 51: MAX TEMP[4].x, IMM[3].yyyy, TEMP[2].xxxx 52: LG2 TEMP[2].x, |TEMP[5].yyyy| 53: MAX TEMP[4].y, IMM[3].yyyy, TEMP[2].xxxx 54: LG2 TEMP[2].x, |TEMP[5].zzzz| 55: MAX TEMP[4].z, IMM[3].yyyy, TEMP[2].xxxx 56: ADD TEMP[0].w, TEMP[1].zzzz, CONST[10].zzzz 57: MUL TEMP[4].xyz, TEMP[4], TEMP[0].wwww 58: EX2 TEMP[5].x, TEMP[4].xxxx 59: EX2 TEMP[5].y, TEMP[4].yyyy 60: EX2 TEMP[5].z, TEMP[4].zzzz 61: TEX TEMP[4], IN[2], SAMP[1], 2D 62: MUL TEMP[4].xyz, TEMP[4], CONST[7] 63: DP3 TEMP[0].w, TEMP[4], TEMP[5] 64: MUL TEMP[1].xyz, TEMP[0].wwww, TEMP[1].xyww 65: DP2 TEMP[2].x, TEMP[7].yzzw, IMM[1] 66: ADD_SAT TEMP[5].x, IMM[1].zzzz, TEMP[2].xxxx 67: DP3_SAT TEMP[5].y, TEMP[7], IMM[2] 68: DP3_SAT TEMP[5].z, TEMP[7].yzxw, IMM[2].yzww 69: MUL TEMP[5].xyz, TEMP[5], TEMP[5] 70: MAX TEMP[6].xyz, TEMP[5], IMM[0].wwww 71: DP3 TEMP[0].w, TEMP[4], TEMP[6] 72: MAD TEMP[1].xyz, TEMP[3], TEMP[0].wwww, TEMP[1] 73: ADD TEMP[1].xyz, TEMP[1], CONST[0] 74: MAD OUT[0].xyz, TEMP[0], CONST[11], TEMP[1] 75: MUL OUT[0].w, IMM[1].wwww, IN[4].wwww 76: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %53 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %54 = load <32 x i8>, <32 x i8> addrspace(2)* %53, align 32, !tbaa !0 %55 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 %57 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %58 = bitcast <8 x i32> addrspace(2)* %57 to <32 x i8> addrspace(2)* %59 = load <32 x i8>, <32 x i8> addrspace(2)* %58, align 32, !tbaa !0 %60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %61 = bitcast <4 x i32> addrspace(2)* %60 to <16 x i8> addrspace(2)* %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %64 = bitcast <8 x i32> addrspace(2)* %63 to <32 x i8> addrspace(2)* %65 = load <32 x i8>, <32 x i8> addrspace(2)* %64, align 32, !tbaa !0 %66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %67 = bitcast <4 x i32> addrspace(2)* %66 to <16 x i8> addrspace(2)* %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %70 = bitcast <8 x i32> addrspace(2)* %69 to <32 x i8> addrspace(2)* %71 = load <32 x i8>, <32 x i8> addrspace(2)* %70, align 32, !tbaa !0 %72 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %73 = bitcast <4 x i32> addrspace(2)* %72 to <16 x i8> addrspace(2)* %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %76 = bitcast <8 x i32> addrspace(2)* %75 to <32 x i8> addrspace(2)* %77 = load <32 x i8>, <32 x i8> addrspace(2)* %76, align 32, !tbaa !0 %78 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %79 = bitcast <4 x i32> addrspace(2)* %78 to <16 x i8> addrspace(2)* %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %82 = bitcast <8 x i32> addrspace(2)* %81 to <32 x i8> addrspace(2)* %83 = load <32 x i8>, <32 x i8> addrspace(2)* %82, align 32, !tbaa !0 %84 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %85 = bitcast <4 x i32> addrspace(2)* %84 to <16 x i8> addrspace(2)* %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 %87 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %88 = bitcast <8 x i32> addrspace(2)* %87 to <32 x i8> addrspace(2)* %89 = load <32 x i8>, <32 x i8> addrspace(2)* %88, align 32, !tbaa !0 %90 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %91 = bitcast <4 x i32> addrspace(2)* %90 to <16 x i8> addrspace(2)* %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0 %93 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %108 = fmul float %45, %48 %109 = fmul float %46, %48 %110 = fmul float %47, %48 %111 = bitcast float %102 to i32 %112 = bitcast float %103 to i32 %113 = insertelement <2 x i32> undef, i32 %111, i32 0 %114 = insertelement <2 x i32> %113, i32 %112, i32 1 %115 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %114, <32 x i8> %83, <16 x i8> %86, i32 2) %116 = extractelement <4 x float> %115, i32 0 %117 = extractelement <4 x float> %115, i32 1 %118 = extractelement <4 x float> %115, i32 2 %119 = fmul float %108, %116 %120 = fmul float %109, %117 %121 = fmul float %110, %118 %122 = fmul float %96, %96 %123 = fmul float %97, %97 %124 = fadd float %123, %122 %125 = fmul float %98, %98 %126 = fadd float %124, %125 %127 = call float @llvm.AMDGPU.rsq.clamped.f32(float %126) %128 = call float @llvm.minnum.f32(float %127, float 0x47EFFFFFE0000000) %129 = fmul float %96, %128 %130 = fmul float %97, %128 %131 = fmul float %98, %128 %132 = fmul float %93, %93 %133 = fmul float %94, %94 %134 = fadd float %133, %132 %135 = fmul float %95, %95 %136 = fadd float %134, %135 %137 = call float @llvm.AMDGPU.rsq.clamped.f32(float %136) %138 = call float @llvm.minnum.f32(float %137, float 0x47EFFFFFE0000000) %139 = fmul float %93, %138 %140 = fmul float %94, %138 %141 = fmul float %95, %138 %142 = fmul float %131, %140 %143 = fmul float %129, %141 %144 = fmul float %130, %139 %145 = fmul float %130, %141 %146 = fsub float %145, %142 %147 = fmul float %131, %139 %148 = fsub float %147, %143 %149 = fmul float %129, %140 %150 = fsub float %149, %144 %151 = fmul float %146, %99 %152 = fmul float %148, %99 %153 = fmul float %150, %99 %154 = fmul float %105, %105 %155 = fmul float %106, %106 %156 = fadd float %155, %154 %157 = fmul float %107, %107 %158 = fadd float %156, %157 %159 = call float @llvm.AMDGPU.rsq.clamped.f32(float %158) %160 = call float @llvm.minnum.f32(float %159, float 0x47EFFFFFE0000000) %161 = fmul float %105, %160 %162 = fmul float %106, %160 %163 = fmul float %107, %160 %164 = bitcast float %102 to i32 %165 = bitcast float %103 to i32 %166 = insertelement <2 x i32> undef, i32 %164, i32 0 %167 = insertelement <2 x i32> %166, i32 %165, i32 1 %168 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %167, <32 x i8> %71, <16 x i8> %74, i32 2) %169 = extractelement <4 x float> %168, i32 0 %170 = extractelement <4 x float> %168, i32 1 %171 = extractelement <4 x float> %168, i32 2 %172 = fmul float %169, 2.000000e+00 %173 = fadd float %172, -1.000000e+00 %174 = fmul float %170, 2.000000e+00 %175 = fadd float %174, -1.000000e+00 %176 = fmul float %171, 2.000000e+00 %177 = fadd float %176, -1.000000e+00 %178 = fmul float %173, %173 %179 = fmul float %175, %175 %180 = fadd float %179, %178 %181 = fmul float %177, %177 %182 = fadd float %180, %181 %183 = call float @llvm.AMDGPU.rsq.clamped.f32(float %182) %184 = call float @llvm.minnum.f32(float %183, float 0x47EFFFFFE0000000) %185 = fmul float %173, %184 %186 = fmul float %175, %184 %187 = fmul float %177, %184 %188 = fmul float %185, %161 %189 = fmul float %186, %162 %190 = fadd float %189, %188 %191 = fmul float %187, %163 %192 = fadd float %190, %191 %193 = fmul float %192, %185 %194 = fmul float %192, %186 %195 = fmul float %192, %187 %196 = fmul float %193, 2.000000e+00 %197 = fsub float %196, %161 %198 = fmul float %194, 2.000000e+00 %199 = fsub float %198, %162 %200 = fmul float %195, 2.000000e+00 %201 = fsub float %200, %163 %202 = fmul float %151, %197 %203 = fmul float %152, %199 %204 = fadd float %203, %202 %205 = fmul float %153, %201 %206 = fadd float %204, %205 %207 = fmul float %139, %197 %208 = fmul float %140, %199 %209 = fadd float %208, %207 %210 = fmul float %141, %201 %211 = fadd float %209, %210 %212 = fmul float %129, %197 %213 = fmul float %130, %199 %214 = fadd float %213, %212 %215 = fmul float %131, %201 %216 = fadd float %214, %215 %217 = insertelement <4 x float> undef, float %211, i32 0 %218 = insertelement <4 x float> %217, float %206, i32 1 %219 = insertelement <4 x float> %218, float %216, i32 2 %220 = insertelement <4 x float> %219, float 0.000000e+00, i32 3 %221 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %220) %222 = extractelement <4 x float> %221, i32 0 %223 = extractelement <4 x float> %221, i32 1 %224 = extractelement <4 x float> %221, i32 2 %225 = extractelement <4 x float> %221, i32 3 %226 = call float @fabs(float %224) %227 = fdiv float 1.000000e+00, %226 %228 = fmul float %222, %227 %229 = fadd float %228, 1.500000e+00 %230 = fmul float %223, %227 %231 = fadd float %230, 1.500000e+00 %232 = bitcast float %231 to i32 %233 = bitcast float %229 to i32 %234 = bitcast float %225 to i32 %235 = insertelement <4 x i32> undef, i32 %232, i32 0 %236 = insertelement <4 x i32> %235, i32 %233, i32 1 %237 = insertelement <4 x i32> %236, i32 %234, i32 2 %238 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %237, <32 x i8> %65, <16 x i8> %68, i32 4) %239 = extractelement <4 x float> %238, i32 0 %240 = extractelement <4 x float> %238, i32 1 %241 = extractelement <4 x float> %238, i32 2 %242 = fmul float %41, %44 %243 = fmul float %42, %44 %244 = fmul float %43, %44 %245 = fmul float %239, %242 %246 = fmul float %240, %243 %247 = fmul float %241, %244 %248 = bitcast float %102 to i32 %249 = bitcast float %103 to i32 %250 = insertelement <2 x i32> undef, i32 %248, i32 0 %251 = insertelement <2 x i32> %250, i32 %249, i32 1 %252 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %251, <32 x i8> %77, <16 x i8> %80, i32 2) %253 = extractelement <4 x float> %252, i32 0 %254 = fmul float %253, %245 %255 = fadd float %254, %119 %256 = fmul float %253, %246 %257 = fadd float %256, %120 %258 = fmul float %253, %247 %259 = fadd float %258, %121 %260 = fsub float 1.000000e+00, %24 %261 = fsub float 1.000000e+00, %25 %262 = fsub float 1.000000e+00, %26 %263 = fmul float %255, %260 %264 = fmul float %257, %261 %265 = fmul float %259, %262 %266 = fmul float %263, %30 %267 = fadd float %266, %27 %268 = fmul float %264, %30 %269 = fadd float %268, %28 %270 = fmul float %265, %30 %271 = fadd float %270, %29 %272 = bitcast float %100 to i32 %273 = bitcast float %101 to i32 %274 = insertelement <2 x i32> undef, i32 %272, i32 0 %275 = insertelement <2 x i32> %274, i32 %273, i32 1 %276 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %275, <32 x i8> %54, <16 x i8> %56, i32 2) %277 = extractelement <4 x float> %276, i32 0 %278 = extractelement <4 x float> %276, i32 1 %279 = extractelement <4 x float> %276, i32 2 %280 = fmul float %277, %35 %281 = fmul float %278, %36 %282 = fmul float %279, %37 %283 = fmul float %267, %280 %284 = fmul float %269, %281 %285 = fmul float %271, %282 %286 = bitcast float %102 to i32 %287 = bitcast float %103 to i32 %288 = insertelement <2 x i32> undef, i32 %286, i32 0 %289 = insertelement <2 x i32> %288, i32 %287, i32 1 %290 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %289, <32 x i8> %89, <16 x i8> %92, i32 2) %291 = extractelement <4 x float> %290, i32 0 %292 = extractelement <4 x float> %290, i32 1 %293 = extractelement <4 x float> %290, i32 2 %294 = fmul float %291, %34 %295 = fadd float %294, %31 %296 = fmul float %292, %34 %297 = fadd float %296, %32 %298 = fmul float %293, %34 %299 = fadd float %298, %33 %300 = fmul float %280, %295 %301 = fmul float %281, %297 %302 = fmul float %282, %299 %303 = fmul float %199, 0x3FEA20BD80000000 %304 = fmul float %201, 0x3FE279A740000000 %305 = fadd float %303, %304 %306 = fadd float %305, 0.000000e+00 %307 = call float @llvm.AMDIL.clamp.(float %306, float 0.000000e+00, float 1.000000e+00) %308 = fmul float %197, 0xBFE6A09E60000000 %309 = fmul float %199, 0xBFDA20BD80000000 %310 = fadd float %309, %308 %311 = fmul float %201, 0x3FE279A740000000 %312 = fadd float %310, %311 %313 = call float @llvm.AMDIL.clamp.(float %312, float 0.000000e+00, float 1.000000e+00) %314 = fmul float %199, 0xBFDA20BD80000000 %315 = fmul float %201, 0x3FE279A740000000 %316 = fadd float %315, %314 %317 = fmul float %197, 0x3FE6A09E60000000 %318 = fadd float %316, %317 %319 = call float @llvm.AMDIL.clamp.(float %318, float 0.000000e+00, float 1.000000e+00) %320 = call float @llvm.maxnum.f32(float %307, float 0x3EB0C6F7A0000000) %321 = call float @llvm.maxnum.f32(float %313, float 0x3EB0C6F7A0000000) %322 = call float @llvm.maxnum.f32(float %319, float 0x3EB0C6F7A0000000) %323 = call float @fabs(float %320) %324 = call float @llvm.log2.f32(float %323) %325 = call float @llvm.maxnum.f32(float %324, float 0xC7EFFFFFE0000000) %326 = call float @fabs(float %321) %327 = call float @llvm.log2.f32(float %326) %328 = call float @llvm.maxnum.f32(float %327, float 0xC7EFFFFFE0000000) %329 = call float @fabs(float %322) %330 = call float @llvm.log2.f32(float %329) %331 = call float @llvm.maxnum.f32(float %330, float 0xC7EFFFFFE0000000) %332 = fadd float %49, 1.000000e+00 %333 = fmul float %325, %332 %334 = fmul float %328, %332 %335 = fmul float %331, %332 %336 = call float @llvm.AMDIL.exp.(float %333) %337 = call float @llvm.AMDIL.exp.(float %334) %338 = call float @llvm.AMDIL.exp.(float %335) %339 = bitcast float %100 to i32 %340 = bitcast float %101 to i32 %341 = insertelement <2 x i32> undef, i32 %339, i32 0 %342 = insertelement <2 x i32> %341, i32 %340, i32 1 %343 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %342, <32 x i8> %59, <16 x i8> %62, i32 2) %344 = extractelement <4 x float> %343, i32 0 %345 = extractelement <4 x float> %343, i32 1 %346 = extractelement <4 x float> %343, i32 2 %347 = fmul float %344, %38 %348 = fmul float %345, %39 %349 = fmul float %346, %40 %350 = fmul float %347, %336 %351 = fmul float %348, %337 %352 = fadd float %351, %350 %353 = fmul float %349, %338 %354 = fadd float %352, %353 %355 = fmul float %354, %300 %356 = fmul float %354, %301 %357 = fmul float %354, %302 %358 = fmul float %186, 0x3FEA20BD80000000 %359 = fmul float %187, 0x3FE279A740000000 %360 = fadd float %358, %359 %361 = fadd float %360, 0.000000e+00 %362 = call float @llvm.AMDIL.clamp.(float %361, float 0.000000e+00, float 1.000000e+00) %363 = fmul float %185, 0xBFE6A09E60000000 %364 = fmul float %186, 0xBFDA20BD80000000 %365 = fadd float %364, %363 %366 = fmul float %187, 0x3FE279A740000000 %367 = fadd float %365, %366 %368 = call float @llvm.AMDIL.clamp.(float %367, float 0.000000e+00, float 1.000000e+00) %369 = fmul float %186, 0xBFDA20BD80000000 %370 = fmul float %187, 0x3FE279A740000000 %371 = fadd float %370, %369 %372 = fmul float %185, 0x3FE6A09E60000000 %373 = fadd float %371, %372 %374 = call float @llvm.AMDIL.clamp.(float %373, float 0.000000e+00, float 1.000000e+00) %375 = fmul float %362, %362 %376 = fmul float %368, %368 %377 = fmul float %374, %374 %378 = call float @llvm.maxnum.f32(float %375, float 0x3EB0C6F7A0000000) %379 = call float @llvm.maxnum.f32(float %376, float 0x3EB0C6F7A0000000) %380 = call float @llvm.maxnum.f32(float %377, float 0x3EB0C6F7A0000000) %381 = fmul float %347, %378 %382 = fmul float %348, %379 %383 = fadd float %382, %381 %384 = fmul float %349, %380 %385 = fadd float %383, %384 %386 = fmul float %283, %385 %387 = fadd float %386, %355 %388 = fmul float %284, %385 %389 = fadd float %388, %356 %390 = fmul float %285, %385 %391 = fadd float %390, %357 %392 = fadd float %387, %24 %393 = fadd float %389, %25 %394 = fadd float %391, %26 %395 = fmul float %267, %50 %396 = fadd float %395, %392 %397 = fmul float %269, %51 %398 = fadd float %397, %393 %399 = fmul float %271, %52 %400 = fadd float %399, %394 %401 = fmul float %104, 3.906250e-03 %402 = call i32 @llvm.SI.packf16(float %396, float %398) %403 = bitcast i32 %402 to float %404 = call i32 @llvm.SI.packf16(float %400, float %401) %405 = bitcast i32 %404 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %403, float %405, float %403, float %405) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700 v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701 v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800 v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801 v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900 v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 v_interp_p1_f32 v13, v0, 3, 4, [m0] ; C8341300 v_interp_p2_f32 v13, [v13], v1, 3, 4, [m0] ; C8351301 v_interp_p1_f32 v14, v0, 0, 5, [m0] ; C8381400 v_interp_p2_f32 v14, [v14], v1, 0, 5, [m0] ; C8391401 v_interp_p1_f32 v15, v0, 1, 5, [m0] ; C83C1500 v_interp_p2_f32 v15, [v15], v1, 1, 5, [m0] ; C83D1501 v_interp_p1_f32 v0, v0, 2, 5, [m0] ; C8001600 v_interp_p2_f32 v0, [v0], v1, 2, 5, [m0] ; C8011601 s_load_dwordx4 s[24:27], s[4:5], 0x14 ; C08C0514 s_load_dwordx4 s[12:15], s[4:5], 0x18 ; C0860518 s_load_dwordx8 s[28:35], s[6:7], 0x28 ; C0CE0728 s_load_dwordx4 s[36:39], s[4:5], 0xc ; C092050C s_load_dwordx8 s[40:47], s[6:7], 0x18 ; C0D40718 s_load_dwordx8 s[16:23], s[6:7], 0x30 ; C0C80730 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[16:18], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[28:35], s[24:27] ; F0800700 00C7100B s_load_dwordx8 s[24:31], s[6:7], 0x20 ; C0CC0720 s_load_dwordx4 s[32:35], s[4:5], 0x10 ; C0900510 image_sample v[19:21], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[40:47], s[36:39] ; F0800700 012A130B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v1, 2.0, v19, -1.0 ; D2820001 03CE26F4 v_mad_f32 v19, 2.0, v20, -1.0 ; D2820013 03CE28F4 v_mad_f32 v20, 2.0, v21, -1.0 ; D2820014 03CE2AF4 v_mul_f32_e32 v21, v1, v1 ; 102A0301 v_mad_f32 v21, v19, v19, v21 ; D2820015 04562713 v_mad_f32 v21, v20, v20, v21 ; D2820015 04562914 v_rsq_clamp_f32_e32 v21, v21 ; 7E2A5915 v_mul_f32_e32 v22, v14, v14 ; 102C1D0E v_mad_f32 v22, v15, v15, v22 ; D2820016 045A1F0F v_mad_f32 v22, v0, v0, v22 ; D2820016 045A0100 v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916 v_min_f32_e32 v21, 0x7f7fffff, v21 ; 1E2A2AFF 7F7FFFFF v_mul_f32_e32 v1, v21, v1 ; 10020315 v_mul_f32_e32 v19, v21, v19 ; 10262715 v_mul_f32_e32 v20, v21, v20 ; 10282915 v_min_f32_e32 v21, 0x7f7fffff, v22 ; 1E2A2CFF 7F7FFFFF v_mul_f32_e32 v22, v21, v14 ; 102C1D15 v_mul_f32_e32 v22, v22, v1 ; 102C0316 v_mul_f32_e32 v23, v21, v15 ; 102E1F15 v_mad_f32 v22, v19, v23, v22 ; D2820016 045A2F13 v_mul_f32_e32 v23, v21, v0 ; 102E0115 v_mad_f32 v22, v20, v23, v22 ; D2820016 045A2F14 v_mul_f32_e32 v23, v1, v22 ; 102E2D01 v_mad_f32 v23, v22, v1, v23 ; D2820017 045E0316 v_mad_f32 v14, -v14, v21, v23 ; D282000E 245E2B0E v_mul_f32_e32 v23, v19, v22 ; 102E2D13 v_mad_f32 v23, v22, v19, v23 ; D2820017 045E2716 v_mad_f32 v15, -v15, v21, v23 ; D282000F 245E2B0F v_mul_f32_e32 v23, v5, v5 ; 102E0B05 v_mad_f32 v23, v6, v6, v23 ; D2820017 045E0D06 v_mad_f32 v23, v7, v7, v23 ; D2820017 045E0F07 v_rsq_clamp_f32_e32 v23, v23 ; 7E2E5917 v_mul_f32_e32 v24, v20, v22 ; 10302D14 v_mad_f32 v22, v22, v20, v24 ; D2820016 04622916 v_mad_f32 v0, -v0, v21, v22 ; D2820000 245A2B00 v_min_f32_e32 v21, 0x7f7fffff, v23 ; 1E2A2EFF 7F7FFFFF v_mul_f32_e32 v22, v2, v2 ; 102C0502 v_mad_f32 v22, v3, v3, v22 ; D2820016 045A0703 v_mad_f32 v22, v4, v4, v22 ; D2820016 045A0904 v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916 v_mul_f32_e32 v5, v21, v5 ; 100A0B15 v_mul_f32_e32 v6, v21, v6 ; 100C0D15 v_mul_f32_e32 v7, v21, v7 ; 100E0F15 v_min_f32_e32 v21, 0x7f7fffff, v22 ; 1E2A2CFF 7F7FFFFF v_mul_f32_e32 v2, v21, v2 ; 10040515 v_mul_f32_e32 v3, v21, v3 ; 10060715 v_mul_f32_e32 v4, v21, v4 ; 10080915 v_mul_f32_e32 v21, v3, v7 ; 102A0F03 v_mad_f32 v21, v6, v4, -v21 ; D2820015 84560906 v_mul_f32_e32 v22, v4, v5 ; 102C0B04 v_mad_f32 v22, v7, v2, -v22 ; D2820016 845A0507 v_mul_f32_e32 v23, v2, v6 ; 102E0D02 v_mad_f32 v23, v5, v3, -v23 ; D2820017 845E0705 v_mul_f32_e32 v21, v8, v21 ; 102A2B08 v_mul_f32_e32 v22, v8, v22 ; 102C2D08 v_mul_f32_e32 v8, v8, v23 ; 10102F08 v_mul_f32_e32 v21, v14, v21 ; 102A2B0E v_mad_f32 v21, v22, v15, v21 ; D2820015 04561F16 v_mul_f32_e32 v2, v14, v2 ; 1004050E v_mad_f32 v2, v3, v15, v2 ; D2820002 040A1F03 v_mad_f32 v22, v8, v0, v21 ; D2820016 04560108 v_mad_f32 v21, v4, v0, v2 ; D2820015 040A0104 v_mul_f32_e32 v2, v14, v5 ; 10040B0E v_mad_f32 v2, v6, v15, v2 ; D2820002 040A1F06 v_mad_f32 v23, v7, v0, v2 ; D2820017 040A0107 v_mov_b32_e32 v24, 0 ; 7E300280 v_cubeid_f32 v27, v21, v22, v23 ; D288001B 045E2D15 v_cubema_f32 v26, v21, v22, v23 ; D28E001A 045E2D15 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 s_load_dwordx4 s[44:47], s[4:5], 0x8 ; C0960508 v_cubesc_f32 v25, v21, v22, v23 ; D28A0019 045E2D15 v_cubetc_f32 v24, v21, v22, v23 ; D28C0018 045E2D15 v_rcp_f32_e64 v2, |v26| ; D3540102 0000011A s_load_dwordx8 s[48:55], s[6:7], 0x0 ; C0D80700 v_mov_b32_e32 v3, 0x3fc00000 ; 7E0602FF 3FC00000 v_mad_f32 v26, v24, v2, v3 ; D282001A 040E0518 v_mad_f32 v25, v25, v2, v3 ; D2820019 040E0519 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[36:43], s[44:47] ; F0800700 01690219 image_sample v5, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[24:31], s[32:35] ; F0800100 0106050B s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 image_sample v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[48:55], s[8:11] ; F0800700 004C0609 image_sample v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[16:23], s[12:15] ; F0800700 0064150B s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114 s_buffer_load_dword s6, s[0:3], 0x15 ; C2030115 s_buffer_load_dword s7, s[0:3], 0x16 ; C2038116 s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124 s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125 s_buffer_load_dword s10, s[0:3], 0x26 ; C2050126 s_buffer_load_dword s11, s[0:3], 0x27 ; C2058127 s_buffer_load_dword s12, s[0:3], 0x18 ; C2060118 s_buffer_load_dword s13, s[0:3], 0x19 ; C2068119 s_buffer_load_dword s14, s[0:3], 0x1a ; C207011A s_buffer_load_dword s15, s[0:3], 0x1c ; C207811C s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v11, s5 ; 7E160205 v_mad_f32 v11, s4, v21, v11 ; D282000B 042E2A04 v_mov_b32_e32 v12, s6 ; 7E180206 v_mad_f32 v12, s4, v22, v12 ; D282000C 04322C04 s_buffer_load_dword s5, s[0:3], 0x23 ; C2028123 s_buffer_load_dword s6, s[0:3], 0x20 ; C2030120 s_buffer_load_dword s16, s[0:3], 0x21 ; C2080121 s_buffer_load_dword s17, s[0:3], 0x22 ; C2088122 v_mov_b32_e32 v21, s7 ; 7E2A0207 v_mad_f32 v21, s4, v23, v21 ; D2820015 04562E04 v_mov_b32_e32 v22, s11 ; 7E2C020B v_mul_f32_e32 v22, s8, v22 ; 102C2C08 v_mov_b32_e32 v23, s11 ; 7E2E020B v_mul_f32_e32 v23, s9, v23 ; 102E2E09 v_mov_b32_e32 v24, s11 ; 7E30020B v_mul_f32_e32 v24, s10, v24 ; 1030300A v_mul_f32_e32 v16, v16, v22 ; 10202D10 v_mul_f32_e32 v17, v17, v23 ; 10222F11 v_mul_f32_e32 v18, v18, v24 ; 10243112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v22, s5 ; 7E2C0205 v_mul_f32_e32 v22, s6, v22 ; 102C2C06 v_mov_b32_e32 v23, s5 ; 7E2E0205 v_mul_f32_e32 v23, s16, v23 ; 102E2E10 v_mov_b32_e32 v24, s5 ; 7E300205 v_mul_f32_e32 v24, s17, v24 ; 10303011 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_buffer_load_dword s5, s[0:3], 0x1e ; C202811E v_mul_f32_e32 v2, v22, v2 ; 10040516 v_mul_f32_e32 v3, v23, v3 ; 10060717 v_mul_f32_e32 v4, v24, v4 ; 10080918 v_mul_f32_e32 v6, s12, v6 ; 100C0C0C v_mul_f32_e32 v7, s13, v7 ; 100E0E0D v_mul_f32_e32 v8, s14, v8 ; 1010100E image_sample v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[28:35], s[24:27] ; F0800700 00C71609 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v9, s15, v22 ; 10122C0F v_mul_f32_e32 v10, s4, v23 ; 10142E04 v_mul_f32_e32 v22, s5, v24 ; 102C3005 v_mad_f32 v2, v5, v2, v16 ; D2820002 04420505 v_mad_f32 v3, v5, v3, v17 ; D2820003 04460705 v_mad_f32 v4, v5, v4, v18 ; D2820004 044A0905 v_mov_b32_e32 v5, 0x3f13cd3a ; 7E0A02FF 3F13CD3A v_mad_f32 v16, v0, v5, 0 ; D2820010 02020B00 v_mov_b32_e32 v17, 0x3f5105ec ; 7E2202FF 3F5105EC v_mad_f32 v16, v15, v17, v16 ; D2820010 0442230F v_mov_b32_e32 v18, 0xbf3504f3 ; 7E2402FF BF3504F3 v_mul_f32_e32 v23, v18, v14 ; 102E1D12 v_mov_b32_e32 v24, 0xbed105ec ; 7E3002FF BED105EC v_mad_f32 v23, v15, v24, v23 ; D2820017 045E310F v_mul_f32_e32 v15, v24, v15 ; 101E1F18 v_mad_f32 v23, v0, v5, v23 ; D2820017 045E0B00 v_mad_f32 v0, v0, v5, v15 ; D2820000 043E0B00 v_mad_f32 v15, v20, v5, 0 ; D282000F 02020B14 v_mad_f32 v15, v19, v17, v15 ; D282000F 043E2313 v_mul_f32_e32 v17, v18, v1 ; 10220312 v_mad_f32 v17, v19, v24, v17 ; D2820011 04463113 v_mul_f32_e32 v18, v24, v19 ; 10242718 v_mad_f32 v17, v20, v5, v17 ; D2820011 04460B14 v_mad_f32 v5, v20, v5, v18 ; D2820005 044A0B14 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_buffer_load_dword s5, s[0:3], 0x13 ; C2028113 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 s_buffer_load_dword s8, s[0:3], 0x2 ; C2040102 s_buffer_load_dword s9, s[0:3], 0x10 ; C2048110 s_buffer_load_dword s10, s[0:3], 0x11 ; C2050111 s_buffer_load_dword s11, s[0:3], 0x2a ; C205812A s_buffer_load_dword s12, s[0:3], 0x2c ; C206012C s_buffer_load_dword s13, s[0:3], 0x2d ; C206812D s_buffer_load_dword s0, s[0:3], 0x2e ; C200012E v_mov_b32_e32 v18, 0x3f3504f3 ; 7E2402FF 3F3504F3 v_mad_f32 v0, v14, v18, v0 ; D2820000 0402250E v_mad_f32 v1, v1, v18, v5 ; D2820001 04162501 v_add_f32_e64 v5, 0, v16 clamp ; D2060805 00022080 v_add_f32_e64 v14, 0, v23 clamp ; D206080E 00022E80 v_max_f32_e32 v5, 0x358637bd, v5 ; 200A0AFF 358637BD v_max_f32_e32 v14, 0x358637bd, v14 ; 201C1CFF 358637BD v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_log_f32_e64 v14, |v14| ; D34E010E 0000010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e64 v16, 1.0, s11 ; D2060010 000016F2 v_mov_b32_e32 v18, 0xff7fffff ; 7E2402FF FF7FFFFF v_max_f32_e32 v5, v18, v5 ; 200A0B12 v_max_f32_e32 v14, v18, v14 ; 201C1D12 v_mul_f32_e32 v5, v16, v5 ; 100A0B10 v_mul_f32_e32 v14, v16, v14 ; 101C1D10 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mul_f32_e32 v5, v5, v9 ; 100A1305 v_mad_f32 v5, v10, v14, v5 ; D2820005 04161D0A v_add_f32_e64 v14, 0, v15 clamp ; D206080E 00021E80 v_mul_f32_e32 v14, v14, v14 ; 101C1D0E v_max_f32_e32 v14, 0x358637bd, v14 ; 201C1CFF 358637BD v_mul_f32_e32 v9, v14, v9 ; 1012130E v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_max_f32_e32 v0, 0x358637bd, v0 ; 200000FF 358637BD v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_add_f32_e64 v14, 0, v17 clamp ; D206080E 00022280 v_mul_f32_e32 v14, v14, v14 ; 101C1D0E v_max_f32_e32 v14, 0x358637bd, v14 ; 201C1CFF 358637BD v_mad_f32 v9, v10, v14, v9 ; D2820009 04261D0A v_max_f32_e32 v0, v18, v0 ; 20000112 v_mul_f32_e32 v0, v16, v0 ; 10000110 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mad_f32 v0, v22, v0, v5 ; D2820000 04160116 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_max_f32_e32 v1, 0x358637bd, v1 ; 200202FF 358637BD v_mad_f32 v1, v22, v1, v9 ; D2820001 04260316 v_sub_f32_e64 v5, 1.0, s6 ; D2080005 00000CF2 v_mul_f32_e32 v2, v5, v2 ; 10040505 v_sub_f32_e64 v5, 1.0, s7 ; D2080005 00000EF2 v_mul_f32_e32 v3, v5, v3 ; 10060705 v_sub_f32_e64 v5, 1.0, s8 ; D2080005 000010F2 v_mul_f32_e32 v4, v5, v4 ; 10080905 v_mov_b32_e32 v5, s9 ; 7E0A0209 v_mad_f32 v2, s5, v2, v5 ; D2820002 04160405 v_mov_b32_e32 v5, s10 ; 7E0A020A v_mad_f32 v3, s5, v3, v5 ; D2820003 04160605 v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v4, s5, v4, v5 ; D2820004 04160805 v_mul_f32_e32 v5, v11, v6 ; 100A0D0B v_mad_f32 v5, v0, v5, s6 ; D2820005 001A0B00 v_mul_f32_e32 v6, v6, v2 ; 100C0506 v_mad_f32 v5, v6, v1, v5 ; D2820005 04160306 v_mul_f32_e32 v6, v12, v7 ; 100C0F0C v_mad_f32 v6, v0, v6, s7 ; D2820006 001E0D00 v_mul_f32_e32 v7, v7, v3 ; 100E0707 v_mad_f32 v6, v7, v1, v6 ; D2820006 041A0307 v_mul_f32_e32 v7, v21, v8 ; 100E1115 v_mad_f32 v0, v0, v7, s8 ; D2820000 00220F00 v_mul_f32_e32 v7, v8, v4 ; 100E0908 v_mad_f32 v0, v7, v1, v0 ; D2820000 04020307 v_mad_f32 v1, v2, s12, v5 ; D2820001 04141902 v_mad_f32 v2, v3, s13, v6 ; D2820002 04181B03 v_mad_f32 v0, v4, s0, v0 ; D2820000 04000104 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_mul_f32_e32 v2, 0x3b800000, v13 ; 10041AFF 3B800000 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 32 Code Size: 1572 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL IN[2], TEXCOORD[5], PERSPECTIVE DCL IN[3], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[0..9] DCL TEMP[0], LOCAL DCL TEMP[1..5] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { 0.8165, 0.5774, 0.0000, 0.0039} IMM[2] FLT32 { -0.7071, -0.4082, 0.5774, 0.7071} IMM[3] FLT32 {340282346638528859811704183484516925440.0000, -340282346638528859811704183484516925440.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[3], IN[3] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[3].xxxx, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[3], TEMP[0].xxxx 4: TEX TEMP[2], IN[1], SAMP[2], 2D 5: MAD TEMP[2].xyz, TEMP[2], IMM[0].xxxx, IMM[0].yyyy 6: DP3 TEMP[0].x, TEMP[2], TEMP[2] 7: RSQ TEMP[0].x, TEMP[0].xxxx 8: MIN TEMP[0].x, IMM[3].xxxx, TEMP[0].xxxx 9: MUL TEMP[3].xyz, TEMP[2], TEMP[0].xxxx 10: DP3 TEMP[1].w, TEMP[3], TEMP[1] 11: MUL TEMP[2].xyz, TEMP[1].wwww, TEMP[3] 12: MAD TEMP[1].xyz, TEMP[2], IMM[0].xxxx, -TEMP[1] 13: DP2 TEMP[0].x, TEMP[1].yzzw, IMM[1] 14: ADD_SAT TEMP[2].x, IMM[1].zzzz, TEMP[0].xxxx 15: DP3_SAT TEMP[2].y, TEMP[1], IMM[2] 16: DP3_SAT TEMP[2].z, TEMP[1].yzxw, IMM[2].yzww 17: MAX TEMP[1].xyz, TEMP[2], IMM[0].wwww 18: LG2 TEMP[0].x, |TEMP[1].xxxx| 19: MAX TEMP[2].x, IMM[3].yyyy, TEMP[0].xxxx 20: LG2 TEMP[0].x, |TEMP[1].yyyy| 21: MAX TEMP[2].y, IMM[3].yyyy, TEMP[0].xxxx 22: LG2 TEMP[0].x, |TEMP[1].zzzz| 23: MAX TEMP[2].z, IMM[3].yyyy, TEMP[0].xxxx 24: MOV TEMP[1].z, IMM[0].zzzz 25: ADD TEMP[1].x, TEMP[1].zzzz, CONST[8].zzzz 26: MUL TEMP[1].xyw, TEMP[2].xyzz, TEMP[1].xxxx 27: EX2 TEMP[2].x, TEMP[1].xxxx 28: EX2 TEMP[2].y, TEMP[1].yyyy 29: EX2 TEMP[2].z, TEMP[1].wwww 30: TEX TEMP[4], IN[0], SAMP[1], 2D 31: MUL TEMP[1].xyw, TEMP[4].xyzz, CONST[7].xyzz 32: DP3 TEMP[2].x, TEMP[1].xyww, TEMP[2] 33: TEX TEMP[4], IN[1], SAMP[4], 2D 34: MAD TEMP[2].yzw, TEMP[4].xxyz, CONST[5].wwww, CONST[5].xxyz 35: TEX TEMP[4], IN[0], SAMP[0], 2D 36: MUL TEMP[4].xyz, TEMP[4], CONST[6] 37: MUL TEMP[2].yzw, TEMP[2], TEMP[4].xxyz 38: MUL TEMP[2].xyz, TEMP[2].xxxx, TEMP[2].yzww 39: DP2 TEMP[0].x, TEMP[3].yzzw, IMM[1] 40: ADD_SAT TEMP[5].x, IMM[1].zzzz, TEMP[0].xxxx 41: DP3_SAT TEMP[5].y, TEMP[3], IMM[2] 42: DP3_SAT TEMP[5].z, TEMP[3].yzxw, IMM[2].yzww 43: MUL TEMP[3].xyz, TEMP[5], TEMP[5] 44: MAX TEMP[5].xyz, TEMP[3], IMM[0].wwww 45: DP3 TEMP[1].x, TEMP[1].xyww, TEMP[5] 46: ADD TEMP[1].yzw, TEMP[1].zzzz, -CONST[0].xxyz 47: TEX TEMP[3], IN[1], SAMP[3], 2D 48: MUL TEMP[1].yzw, TEMP[1], TEMP[3].xxyz 49: MAD TEMP[1].yzw, TEMP[1], CONST[4].wwww, CONST[4].xxyz 50: MUL TEMP[3].xyz, TEMP[4], TEMP[1].yzww 51: MAD TEMP[2].xyz, TEMP[3], TEMP[1].xxxx, TEMP[2] 52: ADD TEMP[2].xyz, TEMP[2], CONST[0] 53: MAD OUT[0].xyz, TEMP[1].yzww, CONST[9], TEMP[2] 54: MUL OUT[0].w, IMM[1].wwww, IN[2].wwww 55: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %45 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %46 = load <32 x i8>, <32 x i8> addrspace(2)* %45, align 32, !tbaa !0 %47 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 %49 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %50 = bitcast <8 x i32> addrspace(2)* %49 to <32 x i8> addrspace(2)* %51 = load <32 x i8>, <32 x i8> addrspace(2)* %50, align 32, !tbaa !0 %52 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %53 = bitcast <4 x i32> addrspace(2)* %52 to <16 x i8> addrspace(2)* %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %56 = bitcast <8 x i32> addrspace(2)* %55 to <32 x i8> addrspace(2)* %57 = load <32 x i8>, <32 x i8> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %59 = bitcast <4 x i32> addrspace(2)* %58 to <16 x i8> addrspace(2)* %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %62 = bitcast <8 x i32> addrspace(2)* %61 to <32 x i8> addrspace(2)* %63 = load <32 x i8>, <32 x i8> addrspace(2)* %62, align 32, !tbaa !0 %64 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %65 = bitcast <4 x i32> addrspace(2)* %64 to <16 x i8> addrspace(2)* %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %68 = bitcast <8 x i32> addrspace(2)* %67 to <32 x i8> addrspace(2)* %69 = load <32 x i8>, <32 x i8> addrspace(2)* %68, align 32, !tbaa !0 %70 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %71 = bitcast <4 x i32> addrspace(2)* %70 to <16 x i8> addrspace(2)* %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %81 = fmul float %78, %78 %82 = fmul float %79, %79 %83 = fadd float %82, %81 %84 = fmul float %80, %80 %85 = fadd float %83, %84 %86 = call float @llvm.AMDGPU.rsq.clamped.f32(float %85) %87 = call float @llvm.minnum.f32(float %86, float 0x47EFFFFFE0000000) %88 = fmul float %78, %87 %89 = fmul float %79, %87 %90 = fmul float %80, %87 %91 = bitcast float %75 to i32 %92 = bitcast float %76 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %94, <32 x i8> %57, <16 x i8> %60, i32 2) %96 = extractelement <4 x float> %95, i32 0 %97 = extractelement <4 x float> %95, i32 1 %98 = extractelement <4 x float> %95, i32 2 %99 = fmul float %96, 2.000000e+00 %100 = fadd float %99, -1.000000e+00 %101 = fmul float %97, 2.000000e+00 %102 = fadd float %101, -1.000000e+00 %103 = fmul float %98, 2.000000e+00 %104 = fadd float %103, -1.000000e+00 %105 = fmul float %100, %100 %106 = fmul float %102, %102 %107 = fadd float %106, %105 %108 = fmul float %104, %104 %109 = fadd float %107, %108 %110 = call float @llvm.AMDGPU.rsq.clamped.f32(float %109) %111 = call float @llvm.minnum.f32(float %110, float 0x47EFFFFFE0000000) %112 = fmul float %100, %111 %113 = fmul float %102, %111 %114 = fmul float %104, %111 %115 = fmul float %112, %88 %116 = fmul float %113, %89 %117 = fadd float %116, %115 %118 = fmul float %114, %90 %119 = fadd float %117, %118 %120 = fmul float %119, %112 %121 = fmul float %119, %113 %122 = fmul float %119, %114 %123 = fmul float %120, 2.000000e+00 %124 = fsub float %123, %88 %125 = fmul float %121, 2.000000e+00 %126 = fsub float %125, %89 %127 = fmul float %122, 2.000000e+00 %128 = fsub float %127, %90 %129 = fmul float %126, 0x3FEA20BD80000000 %130 = fmul float %128, 0x3FE279A740000000 %131 = fadd float %129, %130 %132 = fadd float %131, 0.000000e+00 %133 = call float @llvm.AMDIL.clamp.(float %132, float 0.000000e+00, float 1.000000e+00) %134 = fmul float %124, 0xBFE6A09E60000000 %135 = fmul float %126, 0xBFDA20BD80000000 %136 = fadd float %135, %134 %137 = fmul float %128, 0x3FE279A740000000 %138 = fadd float %136, %137 %139 = call float @llvm.AMDIL.clamp.(float %138, float 0.000000e+00, float 1.000000e+00) %140 = fmul float %126, 0xBFDA20BD80000000 %141 = fmul float %128, 0x3FE279A740000000 %142 = fadd float %141, %140 %143 = fmul float %124, 0x3FE6A09E60000000 %144 = fadd float %142, %143 %145 = call float @llvm.AMDIL.clamp.(float %144, float 0.000000e+00, float 1.000000e+00) %146 = call float @llvm.maxnum.f32(float %133, float 0x3EB0C6F7A0000000) %147 = call float @llvm.maxnum.f32(float %139, float 0x3EB0C6F7A0000000) %148 = call float @llvm.maxnum.f32(float %145, float 0x3EB0C6F7A0000000) %149 = call float @fabs(float %146) %150 = call float @llvm.log2.f32(float %149) %151 = call float @llvm.maxnum.f32(float %150, float 0xC7EFFFFFE0000000) %152 = call float @fabs(float %147) %153 = call float @llvm.log2.f32(float %152) %154 = call float @llvm.maxnum.f32(float %153, float 0xC7EFFFFFE0000000) %155 = call float @fabs(float %148) %156 = call float @llvm.log2.f32(float %155) %157 = call float @llvm.maxnum.f32(float %156, float 0xC7EFFFFFE0000000) %158 = fadd float %41, 1.000000e+00 %159 = fmul float %151, %158 %160 = fmul float %154, %158 %161 = fmul float %157, %158 %162 = call float @llvm.AMDIL.exp.(float %159) %163 = call float @llvm.AMDIL.exp.(float %160) %164 = call float @llvm.AMDIL.exp.(float %161) %165 = bitcast float %73 to i32 %166 = bitcast float %74 to i32 %167 = insertelement <2 x i32> undef, i32 %165, i32 0 %168 = insertelement <2 x i32> %167, i32 %166, i32 1 %169 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %168, <32 x i8> %51, <16 x i8> %54, i32 2) %170 = extractelement <4 x float> %169, i32 0 %171 = extractelement <4 x float> %169, i32 1 %172 = extractelement <4 x float> %169, i32 2 %173 = fmul float %170, %38 %174 = fmul float %171, %39 %175 = fmul float %172, %40 %176 = fmul float %173, %162 %177 = fmul float %174, %163 %178 = fadd float %177, %176 %179 = fmul float %175, %164 %180 = fadd float %178, %179 %181 = bitcast float %75 to i32 %182 = bitcast float %76 to i32 %183 = insertelement <2 x i32> undef, i32 %181, i32 0 %184 = insertelement <2 x i32> %183, i32 %182, i32 1 %185 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %184, <32 x i8> %69, <16 x i8> %72, i32 2) %186 = extractelement <4 x float> %185, i32 0 %187 = extractelement <4 x float> %185, i32 1 %188 = extractelement <4 x float> %185, i32 2 %189 = fmul float %186, %34 %190 = fadd float %189, %31 %191 = fmul float %187, %34 %192 = fadd float %191, %32 %193 = fmul float %188, %34 %194 = fadd float %193, %33 %195 = bitcast float %73 to i32 %196 = bitcast float %74 to i32 %197 = insertelement <2 x i32> undef, i32 %195, i32 0 %198 = insertelement <2 x i32> %197, i32 %196, i32 1 %199 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %198, <32 x i8> %46, <16 x i8> %48, i32 2) %200 = extractelement <4 x float> %199, i32 0 %201 = extractelement <4 x float> %199, i32 1 %202 = extractelement <4 x float> %199, i32 2 %203 = fmul float %200, %35 %204 = fmul float %201, %36 %205 = fmul float %202, %37 %206 = fmul float %190, %203 %207 = fmul float %192, %204 %208 = fmul float %194, %205 %209 = fmul float %180, %206 %210 = fmul float %180, %207 %211 = fmul float %180, %208 %212 = fmul float %113, 0x3FEA20BD80000000 %213 = fmul float %114, 0x3FE279A740000000 %214 = fadd float %212, %213 %215 = fadd float %214, 0.000000e+00 %216 = call float @llvm.AMDIL.clamp.(float %215, float 0.000000e+00, float 1.000000e+00) %217 = fmul float %112, 0xBFE6A09E60000000 %218 = fmul float %113, 0xBFDA20BD80000000 %219 = fadd float %218, %217 %220 = fmul float %114, 0x3FE279A740000000 %221 = fadd float %219, %220 %222 = call float @llvm.AMDIL.clamp.(float %221, float 0.000000e+00, float 1.000000e+00) %223 = fmul float %113, 0xBFDA20BD80000000 %224 = fmul float %114, 0x3FE279A740000000 %225 = fadd float %224, %223 %226 = fmul float %112, 0x3FE6A09E60000000 %227 = fadd float %225, %226 %228 = call float @llvm.AMDIL.clamp.(float %227, float 0.000000e+00, float 1.000000e+00) %229 = fmul float %216, %216 %230 = fmul float %222, %222 %231 = fmul float %228, %228 %232 = call float @llvm.maxnum.f32(float %229, float 0x3EB0C6F7A0000000) %233 = call float @llvm.maxnum.f32(float %230, float 0x3EB0C6F7A0000000) %234 = call float @llvm.maxnum.f32(float %231, float 0x3EB0C6F7A0000000) %235 = fmul float %173, %232 %236 = fmul float %174, %233 %237 = fadd float %236, %235 %238 = fmul float %175, %234 %239 = fadd float %237, %238 %240 = fsub float 1.000000e+00, %24 %241 = fsub float 1.000000e+00, %25 %242 = fsub float 1.000000e+00, %26 %243 = bitcast float %75 to i32 %244 = bitcast float %76 to i32 %245 = insertelement <2 x i32> undef, i32 %243, i32 0 %246 = insertelement <2 x i32> %245, i32 %244, i32 1 %247 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %246, <32 x i8> %63, <16 x i8> %66, i32 2) %248 = extractelement <4 x float> %247, i32 0 %249 = extractelement <4 x float> %247, i32 1 %250 = extractelement <4 x float> %247, i32 2 %251 = fmul float %240, %248 %252 = fmul float %241, %249 %253 = fmul float %242, %250 %254 = fmul float %251, %30 %255 = fadd float %254, %27 %256 = fmul float %252, %30 %257 = fadd float %256, %28 %258 = fmul float %253, %30 %259 = fadd float %258, %29 %260 = fmul float %203, %255 %261 = fmul float %204, %257 %262 = fmul float %205, %259 %263 = fmul float %260, %239 %264 = fadd float %263, %209 %265 = fmul float %261, %239 %266 = fadd float %265, %210 %267 = fmul float %262, %239 %268 = fadd float %267, %211 %269 = fadd float %264, %24 %270 = fadd float %266, %25 %271 = fadd float %268, %26 %272 = fmul float %255, %42 %273 = fadd float %272, %269 %274 = fmul float %257, %43 %275 = fadd float %274, %270 %276 = fmul float %259, %44 %277 = fadd float %276, %271 %278 = fmul float %77, 3.906250e-03 %279 = call i32 @llvm.SI.packf16(float %273, float %275) %280 = bitcast i32 %279 to float %281 = call i32 @llvm.SI.packf16(float %277, float %278) %282 = bitcast i32 %281 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %280, float %282, float %280, float %282) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v2, 0x3f13cd3a ; 7E0402FF 3F13CD3A v_mov_b32_e32 v3, 0x3f5105ec ; 7E0602FF 3F5105EC v_mov_b32_e32 v4, 0xbf3504f3 ; 7E0802FF BF3504F3 v_mov_b32_e32 v5, 0xbed105ec ; 7E0A02FF BED105EC v_mov_b32_e32 v6, 0x3f3504f3 ; 7E0C02FF 3F3504F3 v_mov_b32_e32 v7, 0xff7fffff ; 7E0E02FF FF7FFFFF v_interp_p1_f32 v8, v0, 0, 0, [m0] ; C8200000 v_interp_p2_f32 v8, [v8], v1, 0, 0, [m0] ; C8210001 v_interp_p1_f32 v9, v0, 1, 0, [m0] ; C8240100 v_interp_p2_f32 v9, [v9], v1, 1, 0, [m0] ; C8250101 v_interp_p1_f32 v10, v0, 0, 1, [m0] ; C8280400 v_interp_p2_f32 v10, [v10], v1, 0, 1, [m0] ; C8290401 v_interp_p1_f32 v11, v0, 1, 1, [m0] ; C82C0500 v_interp_p2_f32 v11, [v11], v1, 1, 1, [m0] ; C82D0501 v_interp_p1_f32 v12, v0, 3, 2, [m0] ; C8300B00 v_interp_p2_f32 v12, [v12], v1, 3, 2, [m0] ; C8310B01 v_mul_f32_e32 v12, 0x3b800000, v12 ; 101818FF 3B800000 v_interp_p1_f32 v13, v0, 0, 3, [m0] ; C8340C00 v_interp_p2_f32 v13, [v13], v1, 0, 3, [m0] ; C8350C01 v_interp_p1_f32 v14, v0, 1, 3, [m0] ; C8380D00 v_interp_p2_f32 v14, [v14], v1, 1, 3, [m0] ; C8390D01 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx4 s[52:55], s[4:5], 0x4 ; C09A0504 s_load_dwordx4 s[56:59], s[4:5], 0x8 ; C09C0508 s_load_dwordx4 s[16:19], s[4:5], 0xc ; C088050C s_load_dwordx4 s[40:43], s[4:5], 0x10 ; C0940510 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_load_dwordx8 s[60:67], s[6:7], 0x8 ; C0DE0708 s_load_dwordx8 s[68:75], s[6:7], 0x10 ; C0E20710 s_load_dwordx8 s[20:27], s[6:7], 0x18 ; C0CA0718 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s2, s[12:15], 0x0 ; C2010D00 s_buffer_load_dword s1, s[12:15], 0x1 ; C2008D01 s_buffer_load_dword s0, s[12:15], 0x2 ; C2000D02 s_buffer_load_dword s8, s[12:15], 0x10 ; C2040D10 s_buffer_load_dword s5, s[12:15], 0x11 ; C2028D11 s_buffer_load_dword s10, s[12:15], 0x17 ; C2050D17 s_buffer_load_dword s11, s[12:15], 0x18 ; C2058D18 s_buffer_load_dword s76, s[12:15], 0x19 ; C2260D19 s_buffer_load_dword s77, s[12:15], 0x1a ; C2268D1A s_buffer_load_dword s78, s[12:15], 0x1c ; C2270D1C s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v1, 1.0, s2 ; D2080001 000004F2 v_sub_f32_e64 v15, 1.0, s1 ; D208000F 000002F2 v_sub_f32_e64 v16, 1.0, s0 ; D2080010 000000F2 s_buffer_load_dword s79, s[12:15], 0x1d ; C2278D1D s_buffer_load_dword s80, s[12:15], 0x1e ; C2280D1E s_buffer_load_dword s9, s[12:15], 0x22 ; C2048D22 s_buffer_load_dword s4, s[12:15], 0x24 ; C2020D24 s_buffer_load_dword s3, s[12:15], 0x25 ; C2018D25 s_buffer_load_dword s81, s[12:15], 0x14 ; C2288D14 s_buffer_load_dword s82, s[12:15], 0x15 ; C2290D15 s_buffer_load_dword s83, s[12:15], 0x16 ; C2298D16 s_load_dwordx8 s[44:51], s[6:7], 0x20 ; C0D60720 image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[68:75], s[56:59] ; F0800700 01D1110A image_sample v[20:22], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[60:67], s[52:55] ; F0800700 01AF1408 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mad_f32 v17, 2.0, v17, -1.0 ; D2820011 03CE22F4 v_mov_b32_e32 v23, s81 ; 7E2E0251 v_mov_b32_e32 v24, s82 ; 7E300252 v_mov_b32_e32 v25, s83 ; 7E320253 image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[44:51], s[40:43] ; F0800700 014B1A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v23, s10, v26, v23 ; D2820017 045E340A v_mad_f32 v24, s10, v27, v24 ; D2820018 0462360A v_mad_f32 v25, s10, v28, v25 ; D2820019 0466380A image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[32:39], s[28:31] ; F0800700 00E81A08 image_sample v[8:10], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[20:27], s[16:19] ; F0800700 0085080A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v8, v1 ; 10020308 v_mul_f32_e32 v8, v9, v15 ; 10101F09 v_mul_f32_e32 v9, v10, v16 ; 1012210A v_mad_f32 v10, 2.0, v18, -1.0 ; D282000A 03CE24F4 v_mad_f32 v11, 2.0, v19, -1.0 ; D282000B 03CE26F4 v_mul_f32_e32 v15, s78, v20 ; 101E284E v_mul_f32_e32 v16, s79, v21 ; 10202A4F v_mul_f32_e32 v18, s80, v22 ; 10242C50 v_mul_f32_e32 v19, s11, v26 ; 1026340B v_mul_f32_e32 v20, s76, v27 ; 1028364C v_mul_f32_e32 v21, s77, v28 ; 102A384D v_mul_f32_e32 v22, v17, v17 ; 102C2311 v_mad_f32 v22, v10, v10, v22 ; D2820016 045A150A v_mad_f32 v22, v11, v11, v22 ; D2820016 045A170B v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916 v_mul_f32_e32 v26, v13, v13 ; 10341B0D v_mad_f32 v26, v14, v14, v26 ; D282001A 046A1D0E v_mad_f32 v26, v0, v0, v26 ; D282001A 046A0100 v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A v_min_f32_e32 v22, 0x7f7fffff, v22 ; 1E2C2CFF 7F7FFFFF v_mul_f32_e32 v17, v22, v17 ; 10222316 v_mul_f32_e32 v10, v22, v10 ; 10141516 v_mul_f32_e32 v11, v22, v11 ; 10161716 v_min_f32_e32 v22, 0x7f7fffff, v26 ; 1E2C34FF 7F7FFFFF v_mul_f32_e32 v26, v22, v13 ; 10341B16 v_mul_f32_e32 v26, v26, v17 ; 1034231A v_mul_f32_e32 v27, v22, v14 ; 10361D16 v_mad_f32 v26, v10, v27, v26 ; D282001A 046A370A v_mul_f32_e32 v27, v22, v0 ; 10360116 v_mad_f32 v26, v11, v27, v26 ; D282001A 046A370B v_mul_f32_e32 v27, v17, v26 ; 10363511 v_mad_f32 v27, v26, v17, v27 ; D282001B 046E231A v_mad_f32 v13, -v13, v22, v27 ; D282000D 246E2D0D v_mul_f32_e32 v27, v10, v26 ; 1036350A v_mad_f32 v27, v26, v10, v27 ; D282001B 046E151A v_mad_f32 v14, -v14, v22, v27 ; D282000E 246E2D0E v_mul_f32_e32 v27, v11, v26 ; 1036350B v_mad_f32 v26, v26, v11, v27 ; D282001A 046E171A v_mad_f32 v0, -v0, v22, v26 ; D2820000 246A2D00 v_mad_f32 v22, v0, v2, 0 ; D2820016 02020500 v_mad_f32 v22, v14, v3, v22 ; D2820016 045A070E v_mul_f32_e32 v26, v4, v13 ; 10341B04 v_mad_f32 v26, v14, v5, v26 ; D282001A 046A0B0E v_mul_f32_e32 v14, v5, v14 ; 101C1D05 v_mad_f32 v26, v0, v2, v26 ; D282001A 046A0500 v_mad_f32 v0, v0, v2, v14 ; D2820000 043A0500 v_mad_f32 v14, v11, v2, 0 ; D282000E 0202050B v_mad_f32 v3, v10, v3, v14 ; D2820003 043A070A v_mul_f32_e32 v4, v4, v17 ; 10082304 v_mad_f32 v4, v10, v5, v4 ; D2820004 04120B0A v_mul_f32_e32 v5, v5, v10 ; 100A1505 v_mad_f32 v4, v11, v2, v4 ; D2820004 0412050B v_mad_f32 v2, v11, v2, v5 ; D2820002 0416050B v_mad_f32 v0, v13, v6, v0 ; D2820000 04020D0D s_buffer_load_dword s6, s[12:15], 0x12 ; C2030D12 s_buffer_load_dword s7, s[12:15], 0x13 ; C2038D13 s_buffer_load_dword s10, s[12:15], 0x26 ; C2050D26 v_add_f32_e64 v5, 0, v22 clamp ; D2060805 00022C80 v_add_f32_e64 v10, 0, v26 clamp ; D206080A 00023480 v_max_f32_e32 v5, 0x358637bd, v5 ; 200A0AFF 358637BD v_max_f32_e32 v10, 0x358637bd, v10 ; 201414FF 358637BD v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_log_f32_e64 v10, |v10| ; D34E010A 0000010A v_mad_f32 v2, v17, v6, v2 ; D2820002 040A0D11 v_add_f32_e64 v6, 1.0, s9 ; D2060006 000012F2 v_max_f32_e32 v5, v7, v5 ; 200A0B07 v_max_f32_e32 v10, v7, v10 ; 20141507 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_mul_f32_e32 v10, v6, v10 ; 10141506 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_exp_f32_e32 v10, v10 ; 7E144B0A v_mul_f32_e32 v5, v5, v15 ; 100A1F05 v_mad_f32 v5, v16, v10, v5 ; D2820005 04161510 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_max_f32_e32 v3, 0x358637bd, v3 ; 200606FF 358637BD v_mul_f32_e32 v3, v3, v15 ; 10061F03 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_max_f32_e32 v0, 0x358637bd, v0 ; 200000FF 358637BD v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_max_f32_e32 v4, 0x358637bd, v4 ; 200808FF 358637BD v_mad_f32 v3, v16, v4, v3 ; D2820003 040E0910 v_max_f32_e32 v0, v7, v0 ; 20000107 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mad_f32 v0, v18, v0, v5 ; D2820000 04160112 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_max_f32_e32 v2, 0x358637bd, v2 ; 200404FF 358637BD v_mad_f32 v2, v18, v2, v3 ; D2820002 040E0512 v_mov_b32_e32 v3, s8 ; 7E060208 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s7, v1, v3 ; D2820001 040E0207 v_mov_b32_e32 v3, s5 ; 7E060205 v_mad_f32 v3, s7, v8, v3 ; D2820003 040E1007 v_mov_b32_e32 v4, s6 ; 7E080206 v_mad_f32 v4, s7, v9, v4 ; D2820004 04121207 v_mul_f32_e32 v5, v19, v23 ; 100A2F13 v_mul_f32_e32 v6, v20, v24 ; 100C3114 v_mul_f32_e32 v7, v21, v25 ; 100E3315 v_mul_f32_e32 v8, v1, v19 ; 10102701 v_mul_f32_e32 v9, v3, v20 ; 10122903 v_mul_f32_e32 v10, v4, v21 ; 10142B04 v_mad_f32 v5, v0, v5, s2 ; D2820005 000A0B00 v_mad_f32 v5, v8, v2, v5 ; D2820005 04160508 v_mad_f32 v6, v0, v6, s1 ; D2820006 00060D00 v_mad_f32 v6, v9, v2, v6 ; D2820006 041A0509 v_mad_f32 v0, v0, v7, s0 ; D2820000 00020F00 v_mad_f32 v0, v10, v2, v0 ; D2820000 0402050A v_mad_f32 v1, v1, s4, v5 ; D2820001 04140901 v_mad_f32 v2, v3, s3, v6 ; D2820002 04180703 v_mad_f32 v0, v4, s10, v0 ; D2820000 04001504 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v0, v0, v12 ; 5E001900 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 88 VGPRS: 32 Code Size: 1088 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], COLOR, PERSPECTIVE DCL IN[1], COLOR[1], PERSPECTIVE DCL IN[2], TEXCOORD[0], PERSPECTIVE DCL IN[3], TEXCOORD[1], PERSPECTIVE DCL IN[4], TEXCOORD[2], PERSPECTIVE DCL IN[5], TEXCOORD[5], PERSPECTIVE DCL IN[6], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL CONST[0..11] DCL TEMP[0] DCL TEMP[1], LOCAL DCL TEMP[2..7] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.4000} IMM[1] FLT32 { 0.8165, 0.5774, 0.0000, 0.0000} IMM[2] FLT32 { 0.0039, 0.0000, 340282346638528859811704183484516925440.0000, -0.4082} IMM[3] FLT32 { -0.7071, -0.4082, 0.5774, 0.7071} 0: TEX TEMP[0], IN[3], SAMP[2], 2D 1: MAD TEMP[0].xyz, TEMP[0], IMM[0].xxxx, IMM[0].yyyy 2: DP3 TEMP[1].x, TEMP[0], TEMP[0] 3: RSQ TEMP[1].x, TEMP[1].xxxx 4: MIN TEMP[1].x, IMM[2].zzzz, TEMP[1].xxxx 5: MUL TEMP[2].xyz, TEMP[0], TEMP[1].xxxx 6: DP2 TEMP[1].x, TEMP[2].yzzw, IMM[1] 7: ADD_SAT TEMP[0].x, IMM[1].zzzz, TEMP[1].xxxx 8: DP3_SAT TEMP[0].y, TEMP[2], IMM[3] 9: DP3_SAT TEMP[0].z, TEMP[2].yzxw, IMM[3].yzww 10: MUL TEMP[0].xyz, TEMP[0], TEMP[0] 11: MAX TEMP[3].xyz, TEMP[0], IMM[1].wwww 12: TEX TEMP[0], IN[2], SAMP[1], 2D 13: MUL TEMP[0].xyz, TEMP[0], CONST[7] 14: DP3 TEMP[0].w, TEMP[0], TEMP[3] 15: DP3 TEMP[1].x, IN[1], IN[1] 16: RSQ TEMP[1].x, TEMP[1].xxxx 17: MIN TEMP[1].x, IMM[2].zzzz, TEMP[1].xxxx 18: MUL TEMP[3].xyz, IN[1], TEMP[1].xxxx 19: DP3 TEMP[1].x, IN[0], IN[0] 20: RSQ TEMP[1].x, TEMP[1].xxxx 21: MIN TEMP[1].x, IMM[2].zzzz, TEMP[1].xxxx 22: MUL TEMP[4].xyz, IN[0], TEMP[1].xxxx 23: MUL TEMP[5].xyz, TEMP[3].zxyw, TEMP[4].yzxw 24: MAD TEMP[5].xyz, TEMP[3].yzxw, TEMP[4].zxyw, -TEMP[5] 25: MUL TEMP[5].xyz, TEMP[5], IN[1].wwww 26: DP3 TEMP[1].x, IN[6], IN[6] 27: RSQ TEMP[1].x, TEMP[1].xxxx 28: MIN TEMP[1].x, IMM[2].zzzz, TEMP[1].xxxx 29: MUL TEMP[6].xyz, IN[6], TEMP[1].xxxx 30: DP3 TEMP[2].w, TEMP[2], TEMP[6] 31: MUL TEMP[2].xyz, TEMP[2].wwww, TEMP[2] 32: MAD TEMP[2].xyz, TEMP[2], IMM[0].xxxx, -TEMP[6] 33: DP3 TEMP[5].y, TEMP[5], TEMP[2] 34: DP3 TEMP[5].x, TEMP[4], TEMP[2] 35: DP3 TEMP[5].z, TEMP[3], TEMP[2] 36: TEX TEMP[3], TEMP[5], SAMP[9], CUBE 37: MUL TEMP[4].xyz, CONST[9], CONST[9].wwww 38: MUL TEMP[3].xyz, TEMP[3], TEMP[4] 39: TEX TEMP[4], IN[3], SAMP[7], 2D 40: MUL_SAT TEMP[3].xyz, TEMP[3], TEMP[4] 41: TEX TEMP[4], IN[3], SAMP[8], 2D 42: ADD TEMP[3].xyz, TEMP[3], TEMP[4] 43: MOV TEMP[4].zw, IMM[0] 44: ADD TEMP[4].xyz, TEMP[4].zzzz, -CONST[0] 45: MUL TEMP[3].xyz, TEMP[3], TEMP[4] 46: MAD TEMP[3].xyz, TEMP[3], CONST[4].wwww, CONST[4] 47: TEX TEMP[5], IN[2], SAMP[0], 2D 48: MUL TEMP[4].xyz, TEMP[5], CONST[6] 49: MUL TEMP[5].xyz, TEMP[3], TEMP[4] 50: MUL TEMP[4].xyz, TEMP[4], CONST[5] 51: DP2 TEMP[1].x, TEMP[2].yzzw, IMM[1] 52: ADD_SAT TEMP[7].x, IMM[1].zzzz, TEMP[1].xxxx 53: DP3_SAT TEMP[7].y, TEMP[2], IMM[3] 54: DP3_SAT TEMP[7].z, TEMP[2].yzxw, IMM[3].yzww 55: MAX TEMP[2].xyz, TEMP[7], IMM[1].wwww 56: MUL TEMP[2].xyz, TEMP[2], TEMP[2] 57: MUL TEMP[2].xyz, TEMP[2], TEMP[2] 58: MUL TEMP[2].xyz, TEMP[2], TEMP[2] 59: MUL TEMP[2].xyz, TEMP[2], TEMP[2] 60: DP3 TEMP[0].x, TEMP[0], TEMP[2] 61: MUL TEMP[0].xyz, TEMP[0].xxxx, TEMP[4] 62: MAD TEMP[0].xyz, TEMP[5], TEMP[0].wwww, TEMP[0] 63: MUL TEMP[2], TEMP[4].wwww, CONST[10].xxyy 64: MAD TEMP[2], TEMP[2], TEMP[6].xyxy, IN[4].xyxy 65: TEX TEMP[4], TEMP[2], SAMP[4], 2D 66: TEX TEMP[5], TEMP[2].zwzw, SAMP[5], 2D 67: TEX TEMP[2], TEMP[2].zwzw, SAMP[6], 2D 68: LRP TEMP[6].xyz, TEMP[2].xxxx, TEMP[5], TEMP[4] 69: TEX TEMP[2], IN[3], SAMP[3], 2D 70: MUL TEMP[2].xyz, TEMP[6], TEMP[2] 71: MUL TEMP[4].xyz, CONST[8], CONST[8].wwww 72: MAD TEMP[2].xyz, TEMP[2], TEMP[4], CONST[0] 73: ADD TEMP[0].xyz, TEMP[0], TEMP[2] 74: MAD OUT[0].xyz, TEMP[3], CONST[11], TEMP[0] 75: MUL OUT[0].w, IMM[2].xxxx, IN[5].wwww 76: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %53 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %54 = load <32 x i8>, <32 x i8> addrspace(2)* %53, align 32, !tbaa !0 %55 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 %57 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %58 = bitcast <8 x i32> addrspace(2)* %57 to <32 x i8> addrspace(2)* %59 = load <32 x i8>, <32 x i8> addrspace(2)* %58, align 32, !tbaa !0 %60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %61 = bitcast <4 x i32> addrspace(2)* %60 to <16 x i8> addrspace(2)* %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %64 = bitcast <8 x i32> addrspace(2)* %63 to <32 x i8> addrspace(2)* %65 = load <32 x i8>, <32 x i8> addrspace(2)* %64, align 32, !tbaa !0 %66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %67 = bitcast <4 x i32> addrspace(2)* %66 to <16 x i8> addrspace(2)* %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %70 = bitcast <8 x i32> addrspace(2)* %69 to <32 x i8> addrspace(2)* %71 = load <32 x i8>, <32 x i8> addrspace(2)* %70, align 32, !tbaa !0 %72 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %73 = bitcast <4 x i32> addrspace(2)* %72 to <16 x i8> addrspace(2)* %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %76 = bitcast <8 x i32> addrspace(2)* %75 to <32 x i8> addrspace(2)* %77 = load <32 x i8>, <32 x i8> addrspace(2)* %76, align 32, !tbaa !0 %78 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %79 = bitcast <4 x i32> addrspace(2)* %78 to <16 x i8> addrspace(2)* %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %82 = bitcast <8 x i32> addrspace(2)* %81 to <32 x i8> addrspace(2)* %83 = load <32 x i8>, <32 x i8> addrspace(2)* %82, align 32, !tbaa !0 %84 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %85 = bitcast <4 x i32> addrspace(2)* %84 to <16 x i8> addrspace(2)* %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 %87 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %88 = bitcast <8 x i32> addrspace(2)* %87 to <32 x i8> addrspace(2)* %89 = load <32 x i8>, <32 x i8> addrspace(2)* %88, align 32, !tbaa !0 %90 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %91 = bitcast <4 x i32> addrspace(2)* %90 to <16 x i8> addrspace(2)* %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0 %93 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7 %94 = bitcast <8 x i32> addrspace(2)* %93 to <32 x i8> addrspace(2)* %95 = load <32 x i8>, <32 x i8> addrspace(2)* %94, align 32, !tbaa !0 %96 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7 %97 = bitcast <4 x i32> addrspace(2)* %96 to <16 x i8> addrspace(2)* %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 %99 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 8 %100 = bitcast <8 x i32> addrspace(2)* %99 to <32 x i8> addrspace(2)* %101 = load <32 x i8>, <32 x i8> addrspace(2)* %100, align 32, !tbaa !0 %102 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 8 %103 = bitcast <4 x i32> addrspace(2)* %102 to <16 x i8> addrspace(2)* %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 9 %106 = bitcast <8 x i32> addrspace(2)* %105 to <32 x i8> addrspace(2)* %107 = load <32 x i8>, <32 x i8> addrspace(2)* %106, align 32, !tbaa !0 %108 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 9 %109 = bitcast <4 x i32> addrspace(2)* %108 to <16 x i8> addrspace(2)* %110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0 %111 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %117 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %128 = bitcast float %120 to i32 %129 = bitcast float %121 to i32 %130 = insertelement <2 x i32> undef, i32 %128, i32 0 %131 = insertelement <2 x i32> %130, i32 %129, i32 1 %132 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %131, <32 x i8> %65, <16 x i8> %68, i32 2) %133 = extractelement <4 x float> %132, i32 0 %134 = extractelement <4 x float> %132, i32 1 %135 = extractelement <4 x float> %132, i32 2 %136 = fmul float %133, 2.000000e+00 %137 = fadd float %136, -1.000000e+00 %138 = fmul float %134, 2.000000e+00 %139 = fadd float %138, -1.000000e+00 %140 = fmul float %135, 2.000000e+00 %141 = fadd float %140, -1.000000e+00 %142 = fmul float %137, %137 %143 = fmul float %139, %139 %144 = fadd float %143, %142 %145 = fmul float %141, %141 %146 = fadd float %144, %145 %147 = call float @llvm.AMDGPU.rsq.clamped.f32(float %146) %148 = call float @llvm.minnum.f32(float %147, float 0x47EFFFFFE0000000) %149 = fmul float %137, %148 %150 = fmul float %139, %148 %151 = fmul float %141, %148 %152 = fmul float %150, 0x3FEA20BD80000000 %153 = fmul float %151, 0x3FE279A740000000 %154 = fadd float %152, %153 %155 = fadd float %154, 0.000000e+00 %156 = call float @llvm.AMDIL.clamp.(float %155, float 0.000000e+00, float 1.000000e+00) %157 = fmul float %149, 0xBFE6A09E60000000 %158 = fmul float %150, 0xBFDA20BD80000000 %159 = fadd float %158, %157 %160 = fmul float %151, 0x3FE279A740000000 %161 = fadd float %159, %160 %162 = call float @llvm.AMDIL.clamp.(float %161, float 0.000000e+00, float 1.000000e+00) %163 = fmul float %150, 0xBFDA20BD80000000 %164 = fmul float %151, 0x3FE279A740000000 %165 = fadd float %164, %163 %166 = fmul float %149, 0x3FE6A09E60000000 %167 = fadd float %165, %166 %168 = call float @llvm.AMDIL.clamp.(float %167, float 0.000000e+00, float 1.000000e+00) %169 = fmul float %156, %156 %170 = fmul float %162, %162 %171 = fmul float %168, %168 %172 = call float @llvm.maxnum.f32(float %169, float 0x3EB0C6F7A0000000) %173 = call float @llvm.maxnum.f32(float %170, float 0x3EB0C6F7A0000000) %174 = call float @llvm.maxnum.f32(float %171, float 0x3EB0C6F7A0000000) %175 = bitcast float %118 to i32 %176 = bitcast float %119 to i32 %177 = insertelement <2 x i32> undef, i32 %175, i32 0 %178 = insertelement <2 x i32> %177, i32 %176, i32 1 %179 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %178, <32 x i8> %59, <16 x i8> %62, i32 2) %180 = extractelement <4 x float> %179, i32 0 %181 = extractelement <4 x float> %179, i32 1 %182 = extractelement <4 x float> %179, i32 2 %183 = fmul float %180, %37 %184 = fmul float %181, %38 %185 = fmul float %182, %39 %186 = fmul float %183, %172 %187 = fmul float %184, %173 %188 = fadd float %187, %186 %189 = fmul float %185, %174 %190 = fadd float %188, %189 %191 = fmul float %114, %114 %192 = fmul float %115, %115 %193 = fadd float %192, %191 %194 = fmul float %116, %116 %195 = fadd float %193, %194 %196 = call float @llvm.AMDGPU.rsq.clamped.f32(float %195) %197 = call float @llvm.minnum.f32(float %196, float 0x47EFFFFFE0000000) %198 = fmul float %114, %197 %199 = fmul float %115, %197 %200 = fmul float %116, %197 %201 = fmul float %111, %111 %202 = fmul float %112, %112 %203 = fadd float %202, %201 %204 = fmul float %113, %113 %205 = fadd float %203, %204 %206 = call float @llvm.AMDGPU.rsq.clamped.f32(float %205) %207 = call float @llvm.minnum.f32(float %206, float 0x47EFFFFFE0000000) %208 = fmul float %111, %207 %209 = fmul float %112, %207 %210 = fmul float %113, %207 %211 = fmul float %200, %209 %212 = fmul float %198, %210 %213 = fmul float %199, %208 %214 = fmul float %199, %210 %215 = fsub float %214, %211 %216 = fmul float %200, %208 %217 = fsub float %216, %212 %218 = fmul float %198, %209 %219 = fsub float %218, %213 %220 = fmul float %215, %117 %221 = fmul float %217, %117 %222 = fmul float %219, %117 %223 = fmul float %125, %125 %224 = fmul float %126, %126 %225 = fadd float %224, %223 %226 = fmul float %127, %127 %227 = fadd float %225, %226 %228 = call float @llvm.AMDGPU.rsq.clamped.f32(float %227) %229 = call float @llvm.minnum.f32(float %228, float 0x47EFFFFFE0000000) %230 = fmul float %125, %229 %231 = fmul float %126, %229 %232 = fmul float %127, %229 %233 = fmul float %149, %230 %234 = fmul float %150, %231 %235 = fadd float %234, %233 %236 = fmul float %151, %232 %237 = fadd float %235, %236 %238 = fmul float %237, %149 %239 = fmul float %237, %150 %240 = fmul float %237, %151 %241 = fmul float %238, 2.000000e+00 %242 = fsub float %241, %230 %243 = fmul float %239, 2.000000e+00 %244 = fsub float %243, %231 %245 = fmul float %240, 2.000000e+00 %246 = fsub float %245, %232 %247 = fmul float %220, %242 %248 = fmul float %221, %244 %249 = fadd float %248, %247 %250 = fmul float %222, %246 %251 = fadd float %249, %250 %252 = fmul float %208, %242 %253 = fmul float %209, %244 %254 = fadd float %253, %252 %255 = fmul float %210, %246 %256 = fadd float %254, %255 %257 = fmul float %198, %242 %258 = fmul float %199, %244 %259 = fadd float %258, %257 %260 = fmul float %200, %246 %261 = fadd float %259, %260 %262 = insertelement <4 x float> undef, float %256, i32 0 %263 = insertelement <4 x float> %262, float %251, i32 1 %264 = insertelement <4 x float> %263, float %261, i32 2 %265 = insertelement <4 x float> %264, float 0.000000e+00, i32 3 %266 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %265) %267 = extractelement <4 x float> %266, i32 0 %268 = extractelement <4 x float> %266, i32 1 %269 = extractelement <4 x float> %266, i32 2 %270 = extractelement <4 x float> %266, i32 3 %271 = call float @fabs(float %269) %272 = fdiv float 1.000000e+00, %271 %273 = fmul float %267, %272 %274 = fadd float %273, 1.500000e+00 %275 = fmul float %268, %272 %276 = fadd float %275, 1.500000e+00 %277 = bitcast float %276 to i32 %278 = bitcast float %274 to i32 %279 = bitcast float %270 to i32 %280 = insertelement <4 x i32> undef, i32 %277, i32 0 %281 = insertelement <4 x i32> %280, i32 %278, i32 1 %282 = insertelement <4 x i32> %281, i32 %279, i32 2 %283 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %282, <32 x i8> %107, <16 x i8> %110, i32 4) %284 = extractelement <4 x float> %283, i32 0 %285 = extractelement <4 x float> %283, i32 1 %286 = extractelement <4 x float> %283, i32 2 %287 = fmul float %44, %47 %288 = fmul float %45, %47 %289 = fmul float %46, %47 %290 = fmul float %284, %287 %291 = fmul float %285, %288 %292 = fmul float %286, %289 %293 = bitcast float %120 to i32 %294 = bitcast float %121 to i32 %295 = insertelement <2 x i32> undef, i32 %293, i32 0 %296 = insertelement <2 x i32> %295, i32 %294, i32 1 %297 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %296, <32 x i8> %95, <16 x i8> %98, i32 2) %298 = extractelement <4 x float> %297, i32 0 %299 = extractelement <4 x float> %297, i32 1 %300 = extractelement <4 x float> %297, i32 2 %301 = fmul float %290, %298 %302 = fmul float %291, %299 %303 = fmul float %292, %300 %304 = call float @llvm.AMDIL.clamp.(float %301, float 0.000000e+00, float 1.000000e+00) %305 = call float @llvm.AMDIL.clamp.(float %302, float 0.000000e+00, float 1.000000e+00) %306 = call float @llvm.AMDIL.clamp.(float %303, float 0.000000e+00, float 1.000000e+00) %307 = bitcast float %120 to i32 %308 = bitcast float %121 to i32 %309 = insertelement <2 x i32> undef, i32 %307, i32 0 %310 = insertelement <2 x i32> %309, i32 %308, i32 1 %311 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %310, <32 x i8> %101, <16 x i8> %104, i32 2) %312 = extractelement <4 x float> %311, i32 0 %313 = extractelement <4 x float> %311, i32 1 %314 = extractelement <4 x float> %311, i32 2 %315 = fadd float %304, %312 %316 = fadd float %305, %313 %317 = fadd float %306, %314 %318 = fsub float 1.000000e+00, %24 %319 = fsub float 1.000000e+00, %25 %320 = fsub float 1.000000e+00, %26 %321 = fmul float %315, %318 %322 = fmul float %316, %319 %323 = fmul float %317, %320 %324 = fmul float %321, %30 %325 = fadd float %324, %27 %326 = fmul float %322, %30 %327 = fadd float %326, %28 %328 = fmul float %323, %30 %329 = fadd float %328, %29 %330 = bitcast float %118 to i32 %331 = bitcast float %119 to i32 %332 = insertelement <2 x i32> undef, i32 %330, i32 0 %333 = insertelement <2 x i32> %332, i32 %331, i32 1 %334 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %333, <32 x i8> %54, <16 x i8> %56, i32 2) %335 = extractelement <4 x float> %334, i32 0 %336 = extractelement <4 x float> %334, i32 1 %337 = extractelement <4 x float> %334, i32 2 %338 = fmul float %335, %34 %339 = fmul float %336, %35 %340 = fmul float %337, %36 %341 = fmul float %325, %338 %342 = fmul float %327, %339 %343 = fmul float %329, %340 %344 = fmul float %338, %31 %345 = fmul float %339, %32 %346 = fmul float %340, %33 %347 = fmul float %244, 0x3FEA20BD80000000 %348 = fmul float %246, 0x3FE279A740000000 %349 = fadd float %347, %348 %350 = fadd float %349, 0.000000e+00 %351 = call float @llvm.AMDIL.clamp.(float %350, float 0.000000e+00, float 1.000000e+00) %352 = fmul float %242, 0xBFE6A09E60000000 %353 = fmul float %244, 0xBFDA20BD80000000 %354 = fadd float %353, %352 %355 = fmul float %246, 0x3FE279A740000000 %356 = fadd float %354, %355 %357 = call float @llvm.AMDIL.clamp.(float %356, float 0.000000e+00, float 1.000000e+00) %358 = fmul float %244, 0xBFDA20BD80000000 %359 = fmul float %246, 0x3FE279A740000000 %360 = fadd float %359, %358 %361 = fmul float %242, 0x3FE6A09E60000000 %362 = fadd float %360, %361 %363 = call float @llvm.AMDIL.clamp.(float %362, float 0.000000e+00, float 1.000000e+00) %364 = call float @llvm.maxnum.f32(float %351, float 0x3EB0C6F7A0000000) %365 = call float @llvm.maxnum.f32(float %357, float 0x3EB0C6F7A0000000) %366 = call float @llvm.maxnum.f32(float %363, float 0x3EB0C6F7A0000000) %367 = fmul float %364, %364 %368 = fmul float %365, %365 %369 = fmul float %366, %366 %370 = fmul float %367, %367 %371 = fmul float %368, %368 %372 = fmul float %369, %369 %373 = fmul float %370, %370 %374 = fmul float %371, %371 %375 = fmul float %372, %372 %376 = fmul float %373, %373 %377 = fmul float %374, %374 %378 = fmul float %375, %375 %379 = fmul float %183, %376 %380 = fmul float %184, %377 %381 = fadd float %380, %379 %382 = fmul float %185, %378 %383 = fadd float %381, %382 %384 = fmul float %383, %344 %385 = fmul float %383, %345 %386 = fmul float %383, %346 %387 = fmul float %341, %190 %388 = fadd float %387, %384 %389 = fmul float %342, %190 %390 = fadd float %389, %385 %391 = fmul float %343, %190 %392 = fadd float %391, %386 %393 = fmul float %48, 0x3FD99999A0000000 %394 = fmul float %48, 0x3FD99999A0000000 %395 = fmul float %49, 0x3FD99999A0000000 %396 = fmul float %49, 0x3FD99999A0000000 %397 = fmul float %393, %230 %398 = fadd float %397, %122 %399 = fmul float %394, %231 %400 = fadd float %399, %123 %401 = fmul float %395, %230 %402 = fadd float %401, %122 %403 = fmul float %396, %231 %404 = fadd float %403, %123 %405 = bitcast float %398 to i32 %406 = bitcast float %400 to i32 %407 = insertelement <2 x i32> undef, i32 %405, i32 0 %408 = insertelement <2 x i32> %407, i32 %406, i32 1 %409 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %408, <32 x i8> %77, <16 x i8> %80, i32 2) %410 = extractelement <4 x float> %409, i32 0 %411 = extractelement <4 x float> %409, i32 1 %412 = extractelement <4 x float> %409, i32 2 %413 = bitcast float %402 to i32 %414 = bitcast float %404 to i32 %415 = insertelement <2 x i32> undef, i32 %413, i32 0 %416 = insertelement <2 x i32> %415, i32 %414, i32 1 %417 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %416, <32 x i8> %83, <16 x i8> %86, i32 2) %418 = extractelement <4 x float> %417, i32 0 %419 = extractelement <4 x float> %417, i32 1 %420 = extractelement <4 x float> %417, i32 2 %421 = bitcast float %402 to i32 %422 = bitcast float %404 to i32 %423 = insertelement <2 x i32> undef, i32 %421, i32 0 %424 = insertelement <2 x i32> %423, i32 %422, i32 1 %425 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %424, <32 x i8> %89, <16 x i8> %92, i32 2) %426 = extractelement <4 x float> %425, i32 0 %427 = call float @llvm.AMDGPU.lrp(float %426, float %418, float %410) %428 = call float @llvm.AMDGPU.lrp(float %426, float %419, float %411) %429 = call float @llvm.AMDGPU.lrp(float %426, float %420, float %412) %430 = bitcast float %120 to i32 %431 = bitcast float %121 to i32 %432 = insertelement <2 x i32> undef, i32 %430, i32 0 %433 = insertelement <2 x i32> %432, i32 %431, i32 1 %434 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %433, <32 x i8> %71, <16 x i8> %74, i32 2) %435 = extractelement <4 x float> %434, i32 0 %436 = extractelement <4 x float> %434, i32 1 %437 = extractelement <4 x float> %434, i32 2 %438 = fmul float %427, %435 %439 = fmul float %428, %436 %440 = fmul float %429, %437 %441 = fmul float %40, %43 %442 = fmul float %41, %43 %443 = fmul float %42, %43 %444 = fmul float %438, %441 %445 = fadd float %444, %24 %446 = fmul float %439, %442 %447 = fadd float %446, %25 %448 = fmul float %440, %443 %449 = fadd float %448, %26 %450 = fadd float %388, %445 %451 = fadd float %390, %447 %452 = fadd float %392, %449 %453 = fmul float %325, %50 %454 = fadd float %453, %450 %455 = fmul float %327, %51 %456 = fadd float %455, %451 %457 = fmul float %329, %52 %458 = fadd float %457, %452 %459 = fmul float %124, 3.906250e-03 %460 = call i32 @llvm.SI.packf16(float %454, float %456) %461 = bitcast i32 %460 to float %462 = call i32 @llvm.SI.packf16(float %458, float %459) %463 = bitcast i32 %462 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %461, float %463, float %461, float %463) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700 v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701 v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800 v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801 v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900 v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 v_interp_p1_f32 v13, v0, 0, 4, [m0] ; C8341000 v_interp_p2_f32 v13, [v13], v1, 0, 4, [m0] ; C8351001 v_interp_p1_f32 v14, v0, 1, 4, [m0] ; C8381100 v_interp_p2_f32 v14, [v14], v1, 1, 4, [m0] ; C8391101 v_interp_p1_f32 v15, v0, 3, 5, [m0] ; C83C1700 v_interp_p2_f32 v15, [v15], v1, 3, 5, [m0] ; C83D1701 v_interp_p1_f32 v16, v0, 0, 6, [m0] ; C8401800 v_interp_p2_f32 v16, [v16], v1, 0, 6, [m0] ; C8411801 v_interp_p1_f32 v17, v0, 1, 6, [m0] ; C8441900 v_interp_p2_f32 v17, [v17], v1, 1, 6, [m0] ; C8451901 v_interp_p1_f32 v0, v0, 2, 6, [m0] ; C8001A00 v_interp_p2_f32 v0, [v0], v1, 2, 6, [m0] ; C8011A01 s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710 s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C s_load_dwordx8 s[12:19], s[6:7], 0x18 ; C0C60718 s_load_dwordx8 s[20:27], s[6:7], 0x20 ; C0CA0720 s_load_dwordx4 s[28:31], s[4:5], 0x10 ; C08E0510 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[40:47], s[36:39] ; F0800700 012A120B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, 2.0, v18, -1.0 ; D2820001 03CE24F4 v_mad_f32 v18, 2.0, v19, -1.0 ; D2820012 03CE26F4 v_mad_f32 v19, 2.0, v20, -1.0 ; D2820013 03CE28F4 v_mul_f32_e32 v20, v1, v1 ; 10280301 v_mad_f32 v20, v18, v18, v20 ; D2820014 04522512 v_mad_f32 v20, v19, v19, v20 ; D2820014 04522713 v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 v_min_f32_e32 v20, 0x7f7fffff, v20 ; 1E2828FF 7F7FFFFF v_mul_f32_e32 v21, v16, v16 ; 102A2110 v_mad_f32 v21, v17, v17, v21 ; D2820015 04562311 v_mad_f32 v21, v0, v0, v21 ; D2820015 04560100 v_rsq_clamp_f32_e32 v21, v21 ; 7E2A5915 v_mul_f32_e32 v1, v20, v1 ; 10020314 v_mul_f32_e32 v18, v20, v18 ; 10242514 v_mul_f32_e32 v19, v20, v19 ; 10262714 v_min_f32_e32 v20, 0x7f7fffff, v21 ; 1E282AFF 7F7FFFFF v_mul_f32_e32 v21, v20, v16 ; 102A2114 v_mul_f32_e32 v22, v20, v17 ; 102C2314 v_mul_f32_e32 v23, v21, v1 ; 102E0315 v_mad_f32 v23, v18, v22, v23 ; D2820017 045E2D12 v_mul_f32_e32 v24, v20, v0 ; 10300114 v_mad_f32 v23, v19, v24, v23 ; D2820017 045E3113 v_mul_f32_e32 v24, v1, v23 ; 10302F01 v_mad_f32 v24, v23, v1, v24 ; D2820018 04620317 v_mad_f32 v16, -v16, v20, v24 ; D2820010 24622910 v_mul_f32_e32 v24, v18, v23 ; 10302F12 v_mad_f32 v24, v23, v18, v24 ; D2820018 04622517 v_mad_f32 v17, -v17, v20, v24 ; D2820011 24622911 v_mul_f32_e32 v24, v5, v5 ; 10300B05 v_mad_f32 v24, v6, v6, v24 ; D2820018 04620D06 v_mad_f32 v24, v7, v7, v24 ; D2820018 04620F07 v_rsq_clamp_f32_e32 v24, v24 ; 7E305918 v_mul_f32_e32 v25, v19, v23 ; 10322F13 v_mad_f32 v23, v23, v19, v25 ; D2820017 04662717 v_mad_f32 v0, -v0, v20, v23 ; D2820000 245E2900 v_min_f32_e32 v20, 0x7f7fffff, v24 ; 1E2830FF 7F7FFFFF v_mul_f32_e32 v23, v2, v2 ; 102E0502 v_mad_f32 v23, v3, v3, v23 ; D2820017 045E0703 v_mad_f32 v23, v4, v4, v23 ; D2820017 045E0904 v_rsq_clamp_f32_e32 v23, v23 ; 7E2E5917 v_mul_f32_e32 v5, v20, v5 ; 100A0B14 v_mul_f32_e32 v6, v20, v6 ; 100C0D14 v_mul_f32_e32 v7, v20, v7 ; 100E0F14 v_min_f32_e32 v20, 0x7f7fffff, v23 ; 1E282EFF 7F7FFFFF v_mul_f32_e32 v2, v20, v2 ; 10040514 v_mul_f32_e32 v3, v20, v3 ; 10060714 v_mul_f32_e32 v4, v20, v4 ; 10080914 v_mul_f32_e32 v20, v3, v7 ; 10280F03 v_mad_f32 v20, v6, v4, -v20 ; D2820014 84520906 v_mul_f32_e32 v23, v4, v5 ; 102E0B04 v_mad_f32 v23, v7, v2, -v23 ; D2820017 845E0507 v_mul_f32_e32 v24, v2, v6 ; 10300D02 v_mad_f32 v24, v5, v3, -v24 ; D2820018 84620705 v_mul_f32_e32 v20, v8, v20 ; 10282908 v_mul_f32_e32 v23, v8, v23 ; 102E2F08 v_mul_f32_e32 v8, v8, v24 ; 10103108 v_mul_f32_e32 v20, v16, v20 ; 10282910 v_mad_f32 v20, v23, v17, v20 ; D2820014 04522317 v_mov_b32_e32 v26, 0 ; 7E340280 v_mad_f32 v24, v8, v0, v20 ; D2820018 04520108 v_mul_f32_e32 v2, v16, v2 ; 10040510 v_mad_f32 v2, v3, v17, v2 ; D2820002 040A2303 v_mad_f32 v23, v4, v0, v2 ; D2820017 040A0104 v_mul_f32_e32 v2, v16, v5 ; 10040B10 v_mad_f32 v2, v6, v17, v2 ; D2820002 040A2306 v_mad_f32 v25, v7, v0, v2 ; D2820019 040A0107 v_cubeid_f32 v29, v23, v24, v25 ; D288001D 04663117 v_cubema_f32 v28, v23, v24, v25 ; D28E001C 04663117 v_cubesc_f32 v27, v23, v24, v25 ; D28A001B 04663117 v_cubetc_f32 v26, v23, v24, v25 ; D28C001A 04663117 s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708 s_load_dwordx4 s[44:47], s[4:5], 0x4 ; C0960504 v_rcp_f32_e64 v2, |v28| ; D3540102 0000011C s_load_dwordx4 s[48:51], s[4:5], 0x1c ; C098051C s_load_dwordx4 s[52:55], s[4:5], 0x20 ; C09A0520 s_load_dwordx4 s[56:59], s[4:5], 0x24 ; C09C0524 s_load_dwordx8 s[60:67], s[6:7], 0x48 ; C0DE0748 s_load_dwordx8 s[68:75], s[6:7], 0x38 ; C0E20738 s_load_dwordx8 s[76:83], s[6:7], 0x40 ; C0E60740 s_load_dwordx8 s[84:91], s[6:7], 0x0 ; C0EA0700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[3:5], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[36:43], s[44:47] ; F0800700 01690309 v_mov_b32_e32 v6, 0x3fc00000 ; 7E0C02FF 3FC00000 v_mad_f32 v28, v26, v2, v6 ; D282001C 041A051A s_buffer_load_dword s40, s[0:3], 0x28 ; C2140128 v_mad_f32 v27, v27, v2, v6 ; D282001B 041A051B image_sample v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[60:67], s[56:59] ; F0800700 01CF061B image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[68:75], s[48:51] ; F0800700 0191170B image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[76:83], s[52:55] ; F0800700 01B31A0B image_sample v[29:31], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[84:91], s[32:35] ; F0800700 01151D09 s_buffer_load_dword s41, s[0:3], 0x29 ; C2148129 s_load_dwordx4 s[36:39], s[4:5], 0x14 ; C0920514 s_load_dwordx4 s[32:35], s[4:5], 0x18 ; C0900518 v_mov_b32_e32 v2, 0x3ecccccd ; 7E0402FF 3ECCCCCD s_waitcnt vmcnt(4) lgkmcnt(0) ; BF8C0074 v_mul_f32_e32 v9, s40, v2 ; 10120428 v_mad_f32 v32, v9, v21, v13 ; D2820020 04362B09 s_load_dwordx8 s[44:51], s[6:7], 0x28 ; C0D60728 s_load_dwordx8 s[52:59], s[6:7], 0x30 ; C0DA0730 v_mad_f32 v33, v9, v22, v14 ; D2820021 043A2D09 image_sample v[32:34], 7, 0, 0, 0, 0, 0, 0, 0, v[32:33], s[20:27], s[28:31] ; F0800700 00E52020 s_buffer_load_dword s4, s[0:3], 0x2c ; C202012C s_buffer_load_dword s5, s[0:3], 0x2d ; C202812D s_buffer_load_dword s6, s[0:3], 0x2e ; C203012E s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v2, s41, v2 ; 10040429 v_mad_f32 v9, v2, v21, v13 ; D2820009 04362B02 v_mad_f32 v10, v2, v22, v14 ; D282000A 043A2D02 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[20:22], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[44:51], s[36:39] ; F0800700 012B1409 image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[52:59], s[32:35] ; F0800100 010D0209 s_buffer_load_dword s7, s[0:3], 0x27 ; C2038127 s_buffer_load_dword s20, s[0:3], 0x24 ; C20A0124 s_buffer_load_dword s21, s[0:3], 0x25 ; C20A8125 s_buffer_load_dword s22, s[0:3], 0x26 ; C20B0126 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v9, 1.0, v2 ; 081204F2 v_mul_f32_e32 v10, v32, v9 ; 10141320 v_mul_f32_e32 v13, v33, v9 ; 101A1321 v_mul_f32_e32 v9, v34, v9 ; 10121322 v_mad_f32 v10, v2, v20, v10 ; D282000A 042A2902 v_mad_f32 v13, v2, v21, v13 ; D282000D 04362B02 v_mad_f32 v2, v2, v22, v9 ; D2820002 04262D02 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v9, s7 ; 7E120207 v_mul_f32_e32 v9, s20, v9 ; 10121214 v_mov_b32_e32 v14, s7 ; 7E1C0207 v_mul_f32_e32 v14, s21, v14 ; 101C1C15 v_mov_b32_e32 v20, s7 ; 7E280207 v_mul_f32_e32 v20, s22, v20 ; 10282816 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v7, v14, v7 ; 100E0F0E v_mul_f32_e32 v8, v20, v8 ; 10101114 v_mul_f32_e32 v6, v23, v6 ; 100C0D17 v_mul_f32_e32 v7, v24, v7 ; 100E0F18 v_mul_f32_e32 v8, v25, v8 ; 10101119 v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 v_add_f32_e32 v6, v26, v6 ; 060C0D1A v_add_f32_e32 v7, v27, v7 ; 060E0F1B v_add_f32_e32 v8, v28, v8 ; 0610111C s_buffer_load_dword s20, s[0:3], 0x1d ; C20A011D s_buffer_load_dword s21, s[0:3], 0x1e ; C20A811E s_buffer_load_dword s22, s[0:3], 0x18 ; C20B0118 s_buffer_load_dword s23, s[0:3], 0x19 ; C20B8119 s_buffer_load_dword s24, s[0:3], 0x1a ; C20C011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s7, v3 ; 10060607 s_buffer_load_dword s7, s[0:3], 0x20 ; C2038120 s_buffer_load_dword s25, s[0:3], 0x21 ; C20C8121 s_buffer_load_dword s26, s[0:3], 0x22 ; C20D0122 s_buffer_load_dword s27, s[0:3], 0x23 ; C20D8123 v_mul_f32_e32 v4, s20, v4 ; 10080814 v_mul_f32_e32 v5, s21, v5 ; 100A0A15 v_mul_f32_e32 v9, s22, v29 ; 10123A16 v_mul_f32_e32 v14, s23, v30 ; 101C3C17 v_mul_f32_e32 v20, s24, v31 ; 10283E18 image_sample v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[12:19], s[8:11] ; F0800700 0043150B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v10, v21, v10 ; 10141515 v_mul_f32_e32 v11, v22, v13 ; 10161B16 v_mul_f32_e32 v2, v23, v2 ; 10040517 v_mov_b32_e32 v12, 0x3f13cd3a ; 7E1802FF 3F13CD3A v_mad_f32 v13, v19, v12, 0 ; D282000D 02021913 v_mov_b32_e32 v21, 0x3f5105ec ; 7E2A02FF 3F5105EC v_mad_f32 v13, v18, v21, v13 ; D282000D 04362B12 v_mov_b32_e32 v22, 0xbf3504f3 ; 7E2C02FF BF3504F3 v_mul_f32_e32 v23, v22, v1 ; 102E0316 v_mov_b32_e32 v24, 0xbed105ec ; 7E3002FF BED105EC v_mad_f32 v23, v18, v24, v23 ; D2820017 045E3112 v_mul_f32_e32 v18, v24, v18 ; 10242518 v_mad_f32 v23, v19, v12, v23 ; D2820017 045E1913 v_mad_f32 v18, v19, v12, v18 ; D2820012 044A1913 v_mad_f32 v19, v0, v12, 0 ; D2820013 02021900 v_mad_f32 v19, v17, v21, v19 ; D2820013 044E2B11 v_mul_f32_e32 v21, v22, v16 ; 102A2116 v_mad_f32 v21, v17, v24, v21 ; D2820015 04563111 v_mul_f32_e32 v17, v24, v17 ; 10222318 v_mad_f32 v21, v0, v12, v21 ; D2820015 04561900 v_mad_f32 v0, v0, v12, v17 ; D2820000 04461900 s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110 s_buffer_load_dword s12, s[0:3], 0x11 ; C2060111 s_buffer_load_dword s13, s[0:3], 0x12 ; C2068112 s_buffer_load_dword s14, s[0:3], 0x13 ; C2070113 s_buffer_load_dword s15, s[0:3], 0x14 ; C2078114 s_buffer_load_dword s16, s[0:3], 0x15 ; C2080115 s_buffer_load_dword s0, s[0:3], 0x16 ; C2000116 v_mov_b32_e32 v12, 0x3f3504f3 ; 7E1802FF 3F3504F3 v_mad_f32 v1, v1, v12, v18 ; D2820001 044A1901 v_mad_f32 v0, v16, v12, v0 ; D2820000 04021910 v_add_f32_e64 v12, 0, v13 clamp ; D206080C 00021A80 v_add_f32_e64 v13, 0, v23 clamp ; D206080D 00022E80 v_mul_f32_e32 v12, v12, v12 ; 1018190C v_mul_f32_e32 v13, v13, v13 ; 101A1B0D v_max_f32_e32 v12, 0x358637bd, v12 ; 201818FF 358637BD v_max_f32_e32 v13, 0x358637bd, v13 ; 201A1AFF 358637BD v_mul_f32_e32 v12, v12, v3 ; 1018070C v_mad_f32 v12, v4, v13, v12 ; D282000C 04321B04 v_add_f32_e64 v13, 0, v19 clamp ; D206080D 00022680 v_max_f32_e32 v13, 0x358637bd, v13 ; 201A1AFF 358637BD v_mul_f32_e32 v13, v13, v13 ; 101A1B0D v_mul_f32_e32 v13, v13, v13 ; 101A1B0D v_mul_f32_e32 v13, v13, v13 ; 101A1B0D v_mul_f32_e32 v13, v13, v13 ; 101A1B0D v_mul_f32_e32 v3, v13, v3 ; 1006070D v_add_f32_e64 v13, 0, v21 clamp ; D206080D 00022A80 v_max_f32_e32 v13, 0x358637bd, v13 ; 201A1AFF 358637BD v_mul_f32_e32 v13, v13, v13 ; 101A1B0D v_mul_f32_e32 v13, v13, v13 ; 101A1B0D v_mul_f32_e32 v13, v13, v13 ; 101A1B0D v_mul_f32_e32 v13, v13, v13 ; 101A1B0D v_mad_f32 v3, v4, v13, v3 ; D2820003 040E1B04 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_max_f32_e32 v1, 0x358637bd, v1 ; 200202FF 358637BD v_mad_f32 v1, v5, v1, v12 ; D2820001 04320305 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_max_f32_e32 v0, 0x358637bd, v0 ; 200000FF 358637BD v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mad_f32 v0, v5, v0, v3 ; D2820000 040E0105 v_mov_b32_e32 v3, s27 ; 7E06021B v_mul_f32_e32 v3, s7, v3 ; 10060607 v_mov_b32_e32 v4, s27 ; 7E08021B v_mul_f32_e32 v4, s25, v4 ; 10080819 v_mov_b32_e32 v5, s27 ; 7E0A021B v_mul_f32_e32 v5, s26, v5 ; 100A0A1A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v10, v3, s8 ; D2820003 0022070A v_mad_f32 v4, v11, v4, s9 ; D2820004 0026090B v_mad_f32 v2, v2, v5, s10 ; D2820002 002A0B02 v_mul_f32_e32 v5, s15, v9 ; 100A120F v_mad_f32 v3, v0, v5, v3 ; D2820003 040E0B00 v_mul_f32_e32 v5, s16, v14 ; 100A1C10 v_mad_f32 v4, v0, v5, v4 ; D2820004 04120B00 v_mul_f32_e32 v5, s0, v20 ; 100A2800 v_mad_f32 v0, v0, v5, v2 ; D2820000 040A0B00 v_sub_f32_e64 v2, 1.0, s8 ; D2080002 000010F2 v_mul_f32_e32 v2, v2, v6 ; 10040D02 v_sub_f32_e64 v5, 1.0, s9 ; D2080005 000012F2 v_mul_f32_e32 v5, v5, v7 ; 100A0F05 v_sub_f32_e64 v6, 1.0, s10 ; D2080006 000014F2 v_mul_f32_e32 v6, v6, v8 ; 100C1106 v_mov_b32_e32 v7, s11 ; 7E0E020B v_mad_f32 v2, s14, v2, v7 ; D2820002 041E040E v_mov_b32_e32 v7, s12 ; 7E0E020C v_mad_f32 v5, s14, v5, v7 ; D2820005 041E0A0E v_mov_b32_e32 v7, s13 ; 7E0E020D v_mad_f32 v6, s14, v6, v7 ; D2820006 041E0C0E v_mul_f32_e32 v7, v9, v2 ; 100E0509 v_mad_f32 v3, v7, v1, v3 ; D2820003 040E0307 v_mul_f32_e32 v7, v14, v5 ; 100E0B0E v_mad_f32 v4, v7, v1, v4 ; D2820004 04120307 v_mul_f32_e32 v7, v20, v6 ; 100E0D14 v_mad_f32 v0, v7, v1, v0 ; D2820000 04020307 v_mad_f32 v1, v2, s4, v3 ; D2820001 040C0902 v_mad_f32 v2, v5, s5, v4 ; D2820002 04100B05 v_mad_f32 v0, v6, s6, v0 ; D2820000 04000D06 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_mul_f32_e32 v2, 0x3b800000, v15 ; 10041EFF 3B800000 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 96 VGPRS: 36 Code Size: 1724 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], COLOR, PERSPECTIVE DCL IN[1], COLOR[1], PERSPECTIVE DCL IN[2], TEXCOORD[0], PERSPECTIVE DCL IN[3], TEXCOORD[1], PERSPECTIVE DCL IN[4], TEXCOORD[4], PERSPECTIVE DCL IN[5], TEXCOORD[6], PERSPECTIVE DCL IN[6], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..9] IMM[0] FLT32 { 2.0000, -1.0000, -0.0000, 0.0000} IMM[1] FLT32 { 8.0000, 0.0398, 0.0000, 340282346638528859811704183484516925440.0000} 0: DP3 TEMP[0].x, IN[1], IN[1] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[1], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[0], IN[0] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[0], TEMP[0].xxxx 8: MUL TEMP[3].xyz, TEMP[1].zxyw, TEMP[2].yzxw 9: MAD TEMP[3].xyz, TEMP[1].yzxw, TEMP[2].zxyw, -TEMP[3] 10: MUL TEMP[3].xyz, TEMP[3], IN[1].wwww 11: DP3 TEMP[0].x, IN[5], IN[5] 12: RSQ TEMP[0].x, TEMP[0].xxxx 13: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 14: MUL TEMP[4].xyz, IN[5], TEMP[0].xxxx 15: DP3 TEMP[0].x, IN[4], IN[4] 16: RSQ TEMP[0].x, TEMP[0].xxxx 17: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 18: MUL TEMP[5].xyz, IN[4], TEMP[0].xxxx 19: TEX TEMP[6], IN[3], SAMP[2], 2D 20: MAD TEMP[6].xyz, TEMP[6], IMM[0].xxxx, IMM[0].yyyy 21: DP3 TEMP[0].x, TEMP[6], TEMP[6] 22: RSQ TEMP[0].x, TEMP[0].xxxx 23: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 24: MUL TEMP[7].xyz, TEMP[6], TEMP[0].xxxx 25: DP3 TEMP[1].w, TEMP[7], TEMP[4] 26: MUL TEMP[6].xyz, TEMP[1].wwww, TEMP[7] 27: MAD TEMP[4].xyz, TEMP[6], IMM[0].xxxx, -TEMP[4] 28: TEX TEMP[6], IN[2], SAMP[6], 2D 29: ADD TEMP[1].w, TEMP[6].xxxx, CONST[15].xxxx 30: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[15].yyyy 31: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].zzzz 32: POW TEMP[3].w, |TEMP[1].wwww|, CONST[15].zzzz 33: CMP TEMP[6].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[3].wwww 34: UIF CONST[240].xxxx :56 35: MUL TEMP[8].xyz, CONST[7].xyww, IN[6].yyyy 36: MAD TEMP[8].xyz, CONST[6].xyww, IN[6].xxxx, TEMP[8] 37: MAD TEMP[8].xyz, CONST[8].xyww, IN[6].zzzz, TEMP[8] 38: MAD TEMP[8].xyz, CONST[9].xyww, IN[6].wwww, TEMP[8] 39: RCP TEMP[1].w, TEMP[8].zzzz 40: MUL TEMP[8].xy, TEMP[1].wwww, TEMP[8] 41: MAD TEMP[8].xy, TEMP[8], CONST[1], CONST[1].wzzw 42: TEX TEMP[8], TEMP[8], SAMP[0], 2D 43: UIF CONST[240].yyyy :0 44: DP3 TEMP[1].w, IN[6], IN[6] 45: RSQ TEMP[0], |TEMP[1].wwww| 46: MIN TEMP[1].w, IMM[1].wwww, TEMP[0] 47: RCP TEMP[1].w, TEMP[1].wwww 48: ADD TEMP[1].w, -TEMP[1].wwww, CONST[0].xxxx 49: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[0].yyyy 50: MUL TEMP[1].w, TEMP[1].wwww, TEMP[1].wwww 51: LRP TEMP[9].xyz, TEMP[1].wwww, TEMP[8].wwww, TEMP[6].zzzz 52: ELSE :54 53: MOV TEMP[9].xyz, TEMP[6].zzzz 54: ENDIF 55: MUL TEMP[6].xyz, TEMP[8], TEMP[9] 56: ENDIF 57: MOV TEMP[8].y, IMM[0].yyyy 58: ADD TEMP[8].xyz, -TEMP[8].yyyy, -CONST[10] 59: TEX TEMP[9], IN[3], SAMP[3], 2D 60: MUL TEMP[9].yzw, CONST[11].xxyz, CONST[11].wwww 61: DP3 TEMP[2].x, TEMP[2], TEMP[4] 62: DP3 TEMP[2].y, TEMP[3], TEMP[4] 63: DP3 TEMP[2].z, TEMP[1], TEMP[4] 64: TEX TEMP[1], TEMP[2], SAMP[1], CUBE 65: MUL TEMP[1].xyz, TEMP[1], TEMP[9].yzww 66: MUL TEMP[2].xyz, CONST[12], CONST[12].wwww 67: TEX TEMP[3], IN[3], SAMP[4], 2D 68: MUL TEMP[2].xyz, TEMP[2], TEMP[3] 69: MAD TEMP[1].xyz, TEMP[9].xxxx, TEMP[1], TEMP[2] 70: MUL TEMP[1].xyz, TEMP[8], TEMP[1] 71: MAD TEMP[1].xyz, TEMP[1], CONST[4].wwww, CONST[4] 72: TEX TEMP[2], IN[3], SAMP[5], 2D 73: MAD TEMP[2].xyz, TEMP[2], CONST[5].wwww, CONST[5] 74: DP3_SAT TEMP[1].w, TEMP[7], TEMP[5] 75: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].zzzz 76: DP3_SAT TEMP[3].x, TEMP[4], TEMP[5] 77: ADD TEMP[3].y, TEMP[3].xxxx, IMM[0].zzzz 78: POW TEMP[4].x, |TEMP[3].xxxx|, CONST[13].zzzz 79: MOV TEMP[3].z, CONST[13].zzzz 80: ADD TEMP[3].x, TEMP[3].zzzz, IMM[1].xxxx 81: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx 82: MUL TEMP[3].x, TEMP[3].xxxx, IMM[1].yyyy 83: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1] 84: CMP TEMP[1].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[1] 85: MUL TEMP[2].xyz, TEMP[2], TEMP[3].xxxx 86: CMP TEMP[2].xyz, TEMP[3].yyyy, IMM[0].wwww, TEMP[2] 87: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 88: MUL TEMP[1].xyz, TEMP[6], TEMP[1] 89: MUL OUT[0].xyz, TEMP[1], CONST[14] 90: MOV OUT[0].w, IMM[0].wwww 91: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %54 = bitcast <8 x i32> addrspace(2)* %53 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %57 = bitcast <4 x i32> addrspace(2)* %56 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %60 = bitcast <8 x i32> addrspace(2)* %59 to <32 x i8> addrspace(2)* %61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %63 = bitcast <4 x i32> addrspace(2)* %62 to <16 x i8> addrspace(2)* %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %72 = bitcast <8 x i32> addrspace(2)* %71 to <32 x i8> addrspace(2)* %73 = load <32 x i8>, <32 x i8> addrspace(2)* %72, align 32, !tbaa !0 %74 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %75 = bitcast <4 x i32> addrspace(2)* %74 to <16 x i8> addrspace(2)* %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 %77 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %78 = bitcast <8 x i32> addrspace(2)* %77 to <32 x i8> addrspace(2)* %79 = load <32 x i8>, <32 x i8> addrspace(2)* %78, align 32, !tbaa !0 %80 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %81 = bitcast <4 x i32> addrspace(2)* %80 to <16 x i8> addrspace(2)* %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %84 = bitcast <8 x i32> addrspace(2)* %83 to <32 x i8> addrspace(2)* %85 = load <32 x i8>, <32 x i8> addrspace(2)* %84, align 32, !tbaa !0 %86 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %87 = bitcast <4 x i32> addrspace(2)* %86 to <16 x i8> addrspace(2)* %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %91 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %5, <2 x i32> %7) %110 = fmul float %92, %92 %111 = fmul float %93, %93 %112 = fadd float %111, %110 %113 = fmul float %94, %94 %114 = fadd float %112, %113 %115 = call float @llvm.AMDGPU.rsq.clamped.f32(float %114) %116 = call float @llvm.minnum.f32(float %115, float 0x47EFFFFFE0000000) %117 = fmul float %92, %116 %118 = fmul float %93, %116 %119 = fmul float %94, %116 %120 = fmul float %89, %89 %121 = fmul float %90, %90 %122 = fadd float %121, %120 %123 = fmul float %91, %91 %124 = fadd float %122, %123 %125 = call float @llvm.AMDGPU.rsq.clamped.f32(float %124) %126 = call float @llvm.minnum.f32(float %125, float 0x47EFFFFFE0000000) %127 = fmul float %89, %126 %128 = fmul float %90, %126 %129 = fmul float %91, %126 %130 = fmul float %119, %128 %131 = fmul float %117, %129 %132 = fmul float %118, %127 %133 = fmul float %118, %129 %134 = fsub float %133, %130 %135 = fmul float %119, %127 %136 = fsub float %135, %131 %137 = fmul float %117, %128 %138 = fsub float %137, %132 %139 = fmul float %134, %95 %140 = fmul float %136, %95 %141 = fmul float %138, %95 %142 = fmul float %103, %103 %143 = fmul float %104, %104 %144 = fadd float %143, %142 %145 = fmul float %105, %105 %146 = fadd float %144, %145 %147 = call float @llvm.AMDGPU.rsq.clamped.f32(float %146) %148 = call float @llvm.minnum.f32(float %147, float 0x47EFFFFFE0000000) %149 = fmul float %103, %148 %150 = fmul float %104, %148 %151 = fmul float %105, %148 %152 = fmul float %100, %100 %153 = fmul float %101, %101 %154 = fadd float %153, %152 %155 = fmul float %102, %102 %156 = fadd float %154, %155 %157 = call float @llvm.AMDGPU.rsq.clamped.f32(float %156) %158 = call float @llvm.minnum.f32(float %157, float 0x47EFFFFFE0000000) %159 = fmul float %100, %158 %160 = fmul float %101, %158 %161 = fmul float %102, %158 %162 = bitcast float %98 to i32 %163 = bitcast float %99 to i32 %164 = insertelement <2 x i32> undef, i32 %162, i32 0 %165 = insertelement <2 x i32> %164, i32 %163, i32 1 %166 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %165, <32 x i8> %61, <16 x i8> %64, i32 2) %167 = extractelement <4 x float> %166, i32 0 %168 = extractelement <4 x float> %166, i32 1 %169 = extractelement <4 x float> %166, i32 2 %170 = fmul float %167, 2.000000e+00 %171 = fadd float %170, -1.000000e+00 %172 = fmul float %168, 2.000000e+00 %173 = fadd float %172, -1.000000e+00 %174 = fmul float %169, 2.000000e+00 %175 = fadd float %174, -1.000000e+00 %176 = fmul float %171, %171 %177 = fmul float %173, %173 %178 = fadd float %177, %176 %179 = fmul float %175, %175 %180 = fadd float %178, %179 %181 = call float @llvm.AMDGPU.rsq.clamped.f32(float %180) %182 = call float @llvm.minnum.f32(float %181, float 0x47EFFFFFE0000000) %183 = fmul float %171, %182 %184 = fmul float %173, %182 %185 = fmul float %175, %182 %186 = fmul float %183, %149 %187 = fmul float %184, %150 %188 = fadd float %187, %186 %189 = fmul float %185, %151 %190 = fadd float %188, %189 %191 = fmul float %190, %183 %192 = fmul float %190, %184 %193 = fmul float %190, %185 %194 = fmul float %191, 2.000000e+00 %195 = fsub float %194, %149 %196 = fmul float %192, 2.000000e+00 %197 = fsub float %196, %150 %198 = fmul float %193, 2.000000e+00 %199 = fsub float %198, %151 %200 = bitcast float %96 to i32 %201 = bitcast float %97 to i32 %202 = insertelement <2 x i32> undef, i32 %200, i32 0 %203 = insertelement <2 x i32> %202, i32 %201, i32 1 %204 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %203, <32 x i8> %85, <16 x i8> %88, i32 2) %205 = extractelement <4 x float> %204, i32 0 %206 = fadd float %205, %49 %207 = fmul float %206, %50 %208 = call float @llvm.AMDIL.clamp.(float %207, float 0.000000e+00, float 1.000000e+00) %209 = fadd float %208, 0xBEB0C6F7A0000000 %210 = call float @fabs(float %208) %211 = call float @llvm.pow.f32(float %210, float %51) %212 = call float @llvm.AMDGPU.cndlt(float %209, float 0.000000e+00, float %211) %213 = call float @llvm.AMDGPU.cndlt(float %209, float 0.000000e+00, float %211) %214 = call float @llvm.AMDGPU.cndlt(float %209, float 0.000000e+00, float %211) %215 = bitcast float %52 to i32 %216 = icmp eq i32 %215, 0 br i1 %216, label %ENDIF, label %IF IF: ; preds = %main_body %217 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %218 = load <16 x i8>, <16 x i8> addrspace(2)* %217, align 16, !tbaa !0 %219 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %220 = load <32 x i8>, <32 x i8> addrspace(2)* %219, align 32, !tbaa !0 %221 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3844) %222 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %223 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %224 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %225 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %226 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %227 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %228 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %229 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %230 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %231 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %232 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %233 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %234 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %235 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %236 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %237 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %238 = fmul float %230, %107 %239 = fmul float %229, %107 %240 = fmul float %228, %107 %241 = fmul float %233, %106 %242 = fadd float %241, %238 %243 = fmul float %232, %106 %244 = fadd float %243, %239 %245 = fmul float %231, %106 %246 = fadd float %245, %240 %247 = fmul float %227, %108 %248 = fadd float %247, %242 %249 = fmul float %226, %108 %250 = fadd float %249, %244 %251 = fmul float %225, %108 %252 = fadd float %251, %246 %253 = fmul float %224, %109 %254 = fadd float %253, %248 %255 = fmul float %223, %109 %256 = fadd float %255, %250 %257 = fmul float %222, %109 %258 = fadd float %257, %252 %259 = fdiv float 1.000000e+00, %258 %260 = fmul float %259, %254 %261 = fmul float %259, %256 %262 = fmul float %260, %237 %263 = fadd float %262, %234 %264 = fmul float %261, %236 %265 = fadd float %264, %235 %266 = bitcast float %263 to i32 %267 = bitcast float %265 to i32 %268 = insertelement <2 x i32> undef, i32 %266, i32 0 %269 = insertelement <2 x i32> %268, i32 %267, i32 1 %270 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %269, <32 x i8> %220, <16 x i8> %218, i32 2) %271 = extractelement <4 x float> %270, i32 0 %272 = extractelement <4 x float> %270, i32 1 %273 = extractelement <4 x float> %270, i32 2 %274 = extractelement <4 x float> %270, i32 3 %275 = bitcast float %221 to i32 %276 = icmp eq i32 %275, 0 br i1 %276, label %ENDIF40, label %IF41 ENDIF: ; preds = %main_body, %ENDIF40 %temp24.0 = phi float [ %435, %ENDIF40 ], [ %212, %main_body ] %temp25.0 = phi float [ %436, %ENDIF40 ], [ %213, %main_body ] %temp26.0 = phi float [ %437, %ENDIF40 ], [ %214, %main_body ] %277 = fsub float 1.000000e+00, %34 %278 = fsub float 1.000000e+00, %35 %279 = fsub float 1.000000e+00, %36 %280 = bitcast float %98 to i32 %281 = bitcast float %99 to i32 %282 = insertelement <2 x i32> undef, i32 %280, i32 0 %283 = insertelement <2 x i32> %282, i32 %281, i32 1 %284 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %283, <32 x i8> %67, <16 x i8> %70, i32 2) %285 = extractelement <4 x float> %284, i32 0 %286 = fmul float %37, %40 %287 = fmul float %38, %40 %288 = fmul float %39, %40 %289 = fmul float %127, %195 %290 = fmul float %128, %197 %291 = fadd float %290, %289 %292 = fmul float %129, %199 %293 = fadd float %291, %292 %294 = fmul float %139, %195 %295 = fmul float %140, %197 %296 = fadd float %295, %294 %297 = fmul float %141, %199 %298 = fadd float %296, %297 %299 = fmul float %117, %195 %300 = fmul float %118, %197 %301 = fadd float %300, %299 %302 = fmul float %119, %199 %303 = fadd float %301, %302 %304 = insertelement <4 x float> undef, float %293, i32 0 %305 = insertelement <4 x float> %304, float %298, i32 1 %306 = insertelement <4 x float> %305, float %303, i32 2 %307 = insertelement <4 x float> %306, float %209, i32 3 %308 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %307) %309 = extractelement <4 x float> %308, i32 0 %310 = extractelement <4 x float> %308, i32 1 %311 = extractelement <4 x float> %308, i32 2 %312 = extractelement <4 x float> %308, i32 3 %313 = call float @fabs(float %311) %314 = fdiv float 1.000000e+00, %313 %315 = fmul float %309, %314 %316 = fadd float %315, 1.500000e+00 %317 = fmul float %310, %314 %318 = fadd float %317, 1.500000e+00 %319 = bitcast float %318 to i32 %320 = bitcast float %316 to i32 %321 = bitcast float %312 to i32 %322 = insertelement <4 x i32> undef, i32 %319, i32 0 %323 = insertelement <4 x i32> %322, i32 %320, i32 1 %324 = insertelement <4 x i32> %323, i32 %321, i32 2 %325 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %324, <32 x i8> %55, <16 x i8> %58, i32 4) %326 = extractelement <4 x float> %325, i32 0 %327 = extractelement <4 x float> %325, i32 1 %328 = extractelement <4 x float> %325, i32 2 %329 = fmul float %326, %286 %330 = fmul float %327, %287 %331 = fmul float %328, %288 %332 = fmul float %41, %44 %333 = fmul float %42, %44 %334 = fmul float %43, %44 %335 = bitcast float %98 to i32 %336 = bitcast float %99 to i32 %337 = insertelement <2 x i32> undef, i32 %335, i32 0 %338 = insertelement <2 x i32> %337, i32 %336, i32 1 %339 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %338, <32 x i8> %73, <16 x i8> %76, i32 2) %340 = extractelement <4 x float> %339, i32 0 %341 = extractelement <4 x float> %339, i32 1 %342 = extractelement <4 x float> %339, i32 2 %343 = fmul float %332, %340 %344 = fmul float %333, %341 %345 = fmul float %334, %342 %346 = fmul float %285, %329 %347 = fadd float %346, %343 %348 = fmul float %285, %330 %349 = fadd float %348, %344 %350 = fmul float %285, %331 %351 = fadd float %350, %345 %352 = fmul float %277, %347 %353 = fmul float %278, %349 %354 = fmul float %279, %351 %355 = fmul float %352, %29 %356 = fadd float %355, %26 %357 = fmul float %353, %29 %358 = fadd float %357, %27 %359 = fmul float %354, %29 %360 = fadd float %359, %28 %361 = bitcast float %98 to i32 %362 = bitcast float %99 to i32 %363 = insertelement <2 x i32> undef, i32 %361, i32 0 %364 = insertelement <2 x i32> %363, i32 %362, i32 1 %365 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %364, <32 x i8> %79, <16 x i8> %82, i32 2) %366 = extractelement <4 x float> %365, i32 0 %367 = extractelement <4 x float> %365, i32 1 %368 = extractelement <4 x float> %365, i32 2 %369 = fmul float %366, %33 %370 = fadd float %369, %30 %371 = fmul float %367, %33 %372 = fadd float %371, %31 %373 = fmul float %368, %33 %374 = fadd float %373, %32 %375 = fmul float %183, %159 %376 = fmul float %184, %160 %377 = fadd float %376, %375 %378 = fmul float %185, %161 %379 = fadd float %377, %378 %380 = call float @llvm.AMDIL.clamp.(float %379, float 0.000000e+00, float 1.000000e+00) %381 = fadd float %380, 0xBEB0C6F7A0000000 %382 = fmul float %195, %159 %383 = fmul float %197, %160 %384 = fadd float %383, %382 %385 = fmul float %199, %161 %386 = fadd float %384, %385 %387 = call float @llvm.AMDIL.clamp.(float %386, float 0.000000e+00, float 1.000000e+00) %388 = fadd float %387, 0xBEB0C6F7A0000000 %389 = call float @fabs(float %387) %390 = call float @llvm.pow.f32(float %389, float %45) %391 = fadd float %45, 8.000000e+00 %392 = fmul float %391, %390 %393 = fmul float %392, 0x3FA45F3060000000 %394 = fmul float %380, %356 %395 = fmul float %380, %358 %396 = fmul float %380, %360 %397 = call float @llvm.AMDGPU.cndlt(float %381, float 0.000000e+00, float %394) %398 = call float @llvm.AMDGPU.cndlt(float %381, float 0.000000e+00, float %395) %399 = call float @llvm.AMDGPU.cndlt(float %381, float 0.000000e+00, float %396) %400 = fmul float %370, %393 %401 = fmul float %372, %393 %402 = fmul float %374, %393 %403 = call float @llvm.AMDGPU.cndlt(float %388, float 0.000000e+00, float %400) %404 = call float @llvm.AMDGPU.cndlt(float %388, float 0.000000e+00, float %401) %405 = call float @llvm.AMDGPU.cndlt(float %388, float 0.000000e+00, float %402) %406 = fadd float %397, %403 %407 = fadd float %398, %404 %408 = fadd float %399, %405 %409 = fmul float %temp24.0, %406 %410 = fmul float %temp25.0, %407 %411 = fmul float %temp26.0, %408 %412 = fmul float %409, %46 %413 = fmul float %410, %47 %414 = fmul float %411, %48 %415 = call i32 @llvm.SI.packf16(float %412, float %413) %416 = bitcast i32 %415 to float %417 = call i32 @llvm.SI.packf16(float %414, float 0.000000e+00) %418 = bitcast i32 %417 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %416, float %418, float %416, float %418) ret void IF41: ; preds = %IF %419 = fmul float %106, %106 %420 = fmul float %107, %107 %421 = fadd float %420, %419 %422 = fmul float %108, %108 %423 = fadd float %421, %422 %424 = call float @fabs(float %423) %425 = call float @llvm.AMDGPU.rsq.clamped.f32(float %424) %426 = call float @llvm.minnum.f32(float %425, float 0x47EFFFFFE0000000) %427 = fdiv float 1.000000e+00, %426 %428 = fsub float %24, %427 %429 = fmul float %428, %25 %430 = call float @llvm.AMDIL.clamp.(float %429, float 0.000000e+00, float 1.000000e+00) %431 = fmul float %430, %430 %432 = call float @llvm.AMDGPU.lrp(float %431, float %274, float %214) %433 = call float @llvm.AMDGPU.lrp(float %431, float %274, float %214) %434 = call float @llvm.AMDGPU.lrp(float %431, float %274, float %214) br label %ENDIF40 ENDIF40: ; preds = %IF, %IF41 %temp36.0 = phi float [ %432, %IF41 ], [ %214, %IF ] %temp37.0 = phi float [ %433, %IF41 ], [ %214, %IF ] %temp38.0 = phi float [ %434, %IF41 ], [ %214, %IF ] %435 = fmul float %271, %temp36.0 %436 = fmul float %272, %temp37.0 %437 = fmul float %273, %temp38.0 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v12, v0, 0, 0, [m0] ; C8300000 v_interp_p2_f32 v12, [v12], v1, 0, 0, [m0] ; C8310001 v_interp_p1_f32 v13, v0, 1, 0, [m0] ; C8340100 v_interp_p2_f32 v13, [v13], v1, 1, 0, [m0] ; C8350101 v_interp_p1_f32 v14, v0, 2, 0, [m0] ; C8380200 v_interp_p2_f32 v14, [v14], v1, 2, 0, [m0] ; C8390201 v_interp_p1_f32 v15, v0, 0, 1, [m0] ; C83C0400 v_interp_p2_f32 v15, [v15], v1, 0, 1, [m0] ; C83D0401 v_interp_p1_f32 v18, v0, 1, 1, [m0] ; C8480500 v_interp_p2_f32 v18, [v18], v1, 1, 1, [m0] ; C8490501 v_interp_p1_f32 v19, v0, 2, 1, [m0] ; C84C0600 v_interp_p2_f32 v19, [v19], v1, 2, 1, [m0] ; C84D0601 v_interp_p1_f32 v5, v0, 3, 1, [m0] ; C8140700 v_interp_p2_f32 v5, [v5], v1, 3, 1, [m0] ; C8150701 v_interp_p1_f32 v16, v0, 0, 2, [m0] ; C8400800 v_interp_p2_f32 v16, [v16], v1, 0, 2, [m0] ; C8410801 v_interp_p1_f32 v17, v0, 1, 2, [m0] ; C8440900 v_interp_p2_f32 v17, [v17], v1, 1, 2, [m0] ; C8450901 v_interp_p1_f32 v2, v0, 0, 3, [m0] ; C8080C00 v_interp_p2_f32 v2, [v2], v1, 0, 3, [m0] ; C8090C01 v_interp_p1_f32 v3, v0, 1, 3, [m0] ; C80C0D00 v_interp_p2_f32 v3, [v3], v1, 1, 3, [m0] ; C80D0D01 v_interp_p1_f32 v6, v0, 0, 4, [m0] ; C8181000 v_interp_p2_f32 v6, [v6], v1, 0, 4, [m0] ; C8191001 v_interp_p1_f32 v7, v0, 1, 4, [m0] ; C81C1100 v_interp_p2_f32 v7, [v7], v1, 1, 4, [m0] ; C81D1101 v_interp_p1_f32 v8, v0, 2, 4, [m0] ; C8201200 v_interp_p2_f32 v8, [v8], v1, 2, 4, [m0] ; C8211201 v_interp_p1_f32 v10, v0, 0, 5, [m0] ; C8281400 s_load_dwordx4 s[8:11], s[4:5], 0x8 ; C0840508 s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710 s_load_dwordx4 s[20:23], s[4:5], 0x18 ; C08A0518 s_load_dwordx8 s[24:31], s[6:7], 0x30 ; C0CC0730 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s32, s[0:3], 0x3c ; C210013C s_buffer_load_dword s33, s[0:3], 0x3d ; C210813D v_interp_p2_f32 v10, [v10], v1, 0, 5, [m0] ; C8291401 v_interp_p1_f32 v11, v0, 1, 5, [m0] ; C82C1500 v_interp_p2_f32 v11, [v11], v1, 1, 5, [m0] ; C82D1501 v_interp_p1_f32 v9, v0, 2, 5, [m0] ; C8241600 v_interp_p2_f32 v9, [v9], v1, 2, 5, [m0] ; C8251601 image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00431702 s_buffer_load_dword s8, s[0:3], 0x3e ; C204013E image_sample v4, 1, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[24:31], s[20:23] ; F0800100 00A60410 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v4, s32, v4 ; 06080820 v_mul_f32_e32 v4, s33, v4 ; 10080821 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_and_b32_e32 v16, 0x7fffffff, v4 ; 362008FF 7FFFFFFF v_log_f32_e32 v16, v16 ; 7E204F10 v_mad_f32 v21, 2.0, v23, -1.0 ; D2820015 03CE2EF4 v_mad_f32 v22, 2.0, v24, -1.0 ; D2820016 03CE30F4 v_mad_f32 v23, 2.0, v25, -1.0 ; D2820017 03CE32F4 v_mul_legacy_f32_e32 v16, s8, v16 ; 0E202008 s_movk_i32 s8, 0xf00 ; B0080F00 s_buffer_load_dword s12, s[0:3], s8 ; C2060008 v_mul_f32_e32 v17, v15, v15 ; 10221F0F v_mad_f32 v17, v18, v18, v17 ; D2820011 04462512 v_mad_f32 v17, v19, v19, v17 ; D2820011 04462713 v_rsq_clamp_f32_e32 v24, v17 ; 7E305911 v_mul_f32_e32 v17, v12, v12 ; 1022190C v_mad_f32 v17, v13, v13, v17 ; D2820011 04461B0D v_mad_f32 v17, v14, v14, v17 ; D2820011 04461D0E v_rsq_clamp_f32_e32 v25, v17 ; 7E325911 v_mul_f32_e32 v17, v10, v10 ; 1022150A v_mad_f32 v17, v11, v11, v17 ; D2820011 0446170B v_mad_f32 v17, v9, v9, v17 ; D2820011 04461309 v_rsq_clamp_f32_e32 v26, v17 ; 7E345911 v_mul_f32_e32 v17, v6, v6 ; 10220D06 v_mad_f32 v17, v7, v7, v17 ; D2820011 04460F07 v_mad_f32 v17, v8, v8, v17 ; D2820011 04461108 v_rsq_clamp_f32_e32 v20, v17 ; 7E285911 s_buffer_load_dword s11, s[0:3], 0x13 ; C2058113 s_buffer_load_dword s10, s[0:3], 0x17 ; C2050117 s_buffer_load_dword s9, s[0:3], 0x2f ; C204812F s_buffer_load_dword s8, s[0:3], 0x33 ; C2040133 v_mul_f32_e32 v17, v21, v21 ; 10222B15 v_mad_f32 v17, v22, v22, v17 ; D2820011 04462D16 v_mad_f32 v17, v23, v23, v17 ; D2820011 04462F17 v_rsq_clamp_f32_e32 v28, v17 ; 7E385911 v_exp_f32_e32 v17, v16 ; 7E224B10 v_mov_b32_e32 v16, 0xb58637bd ; 7E2002FF B58637BD v_add_f32_e32 v16, v4, v16 ; 06202104 v_cmp_gt_f32_e32 vcc, 0, v16 ; 7C082080 v_cndmask_b32_e64 v4, v17, 0, vcc ; D2000004 01A90111 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ne_i32_e64 s[12:13], 0, s12 ; D10A000C 00001880 v_mov_b32_e32 v16, v4 ; 7E200304 v_mov_b32_e32 v17, v4 ; 7E220304 s_and_saveexec_b64 s[12:13], s[12:13] ; BE8C240C s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_cbranch_execz BB0_4 ; BF880000 v_interp_p1_f32 v16, v0, 0, 6, [m0] ; C8401800 v_interp_p2_f32 v16, [v16], v1, 0, 6, [m0] ; C8411801 v_interp_p1_f32 v17, v0, 1, 6, [m0] ; C8441900 v_interp_p2_f32 v17, [v17], v1, 1, 6, [m0] ; C8451901 v_interp_p1_f32 v27, v0, 2, 6, [m0] ; C86C1A00 v_interp_p2_f32 v27, [v27], v1, 2, 6, [m0] ; C86D1A01 s_buffer_load_dword s14, s[0:3], 0x19 ; C2070119 s_buffer_load_dword s15, s[0:3], 0x1b ; C207811B s_buffer_load_dword s16, s[0:3], 0x1c ; C208011C s_buffer_load_dword s17, s[0:3], 0x1d ; C208811D s_buffer_load_dword s18, s[0:3], 0x4 ; C2090104 s_buffer_load_dword s19, s[0:3], 0x5 ; C2098105 s_buffer_load_dword s20, s[0:3], 0x6 ; C20A0106 s_buffer_load_dword s21, s[0:3], 0x7 ; C20A8107 s_buffer_load_dword s22, s[0:3], 0x18 ; C20B0118 v_interp_p1_f32 v0, v0, 3, 6, [m0] ; C8001B00 v_interp_p2_f32 v0, [v0], v1, 3, 6, [m0] ; C8011B01 s_buffer_load_dword s23, s[0:3], 0x27 ; C20B8127 s_buffer_load_dword s24, s[0:3], 0x1f ; C20C011F s_buffer_load_dword s25, s[0:3], 0x20 ; C20C8120 s_buffer_load_dword s26, s[0:3], 0x21 ; C20D0121 s_buffer_load_dword s27, s[0:3], 0x23 ; C20D8123 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s16, v17 ; 10022210 v_mul_f32_e32 v29, s17, v17 ; 103A2211 v_mad_f32 v1, s22, v16, v1 ; D2820001 04062016 v_mad_f32 v29, s14, v16, v29 ; D282001D 0476200E s_buffer_load_dword s14, s[0:3], 0x24 ; C2070124 s_buffer_load_dword s16, s[0:3], 0x25 ; C2080125 v_mul_f32_e32 v30, s24, v17 ; 103C2218 v_mad_f32 v30, s15, v16, v30 ; D282001E 047A200F v_mad_f32 v1, s25, v27, v1 ; D2820001 04063619 v_mad_f32 v30, s27, v27, v30 ; D282001E 047A361B v_mad_f32 v30, s23, v0, v30 ; D282001E 047A0017 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_movk_i32 s15, 0xf04 ; B00F0F04 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_buffer_load_dword s15, s[0:3], s15 ; C207800F v_rcp_f32_e32 v30, v30 ; 7E3C551E v_mad_f32 v29, s26, v27, v29 ; D282001D 0476361A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s14, v0, v1 ; D2820001 0406000E v_mad_f32 v0, s16, v0, v29 ; D2820000 04760010 v_mul_f32_e32 v1, v1, v30 ; 10023D01 v_mul_f32_e32 v0, v0, v30 ; 10003D00 v_mov_b32_e32 v29, s21 ; 7E3A0215 v_mad_f32 v29, s18, v1, v29 ; D282001D 04760212 v_mov_b32_e32 v1, s20 ; 7E020214 v_mad_f32 v30, s19, v0, v1 ; D282001E 04060013 image_sample v[29:32], 15, 0, 0, 0, 0, 0, 0, 0, v[29:30], s[32:39], s[28:31] ; F0800F00 00E81D1D v_cmp_ne_i32_e64 s[14:15], 0, s15 ; D10A000E 00001E80 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[14:15], s[14:15] ; BE8E240E s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E s_cbranch_execz BB0_5 ; BF880000 s_buffer_load_dword s16, s[0:3], 0x0 ; C2080100 s_buffer_load_dword s17, s[0:3], 0x1 ; C2088101 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v16, v16 ; 10002110 v_mad_f32 v0, v17, v17, v0 ; D2820000 04022311 v_mad_f32 v0, v27, v27, v0 ; D2820000 0402371B v_rsq_clamp_f32_e64 v0, |v0| ; D3580100 00000100 v_min_f32_e32 v0, 0x7f7fffff, v0 ; 1E0000FF 7F7FFFFF v_rcp_f32_e32 v0, v0 ; 7E005500 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v0, s16, v0 ; 08000010 v_mul_f32_e32 v0, s17, v0 ; 10000011 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v1, v0, v0 ; 10020100 v_mad_f32 v0, -v0, v0, 1.0 ; D2820000 23CA0100 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_mad_f32 v4, v1, v32, v0 ; D2820004 04024101 s_or_b64 exec, exec, s[14:15] ; 88FE0E7E v_mul_f32_e32 v17, v4, v29 ; 10223B04 v_mul_f32_e32 v16, v4, v30 ; 10203D04 v_mul_f32_e32 v4, v4, v31 ; 10083F04 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E v_mov_b32_e32 v0, s11 ; 7E00020B v_min_f32_e32 v1, 0x7f7fffff, v24 ; 1E0230FF 7F7FFFFF v_mul_f32_e32 v27, v1, v15 ; 10361F01 v_mul_f32_e32 v24, v1, v18 ; 10302501 v_mul_f32_e32 v15, v1, v19 ; 101E2701 v_mov_b32_e32 v19, s10 ; 7E26020A v_min_f32_e32 v1, 0x7f7fffff, v25 ; 1E0232FF 7F7FFFFF v_mul_f32_e32 v25, v1, v12 ; 10321901 v_mul_f32_e32 v29, v1, v13 ; 103A1B01 v_mul_f32_e32 v13, v1, v14 ; 101A1D01 v_mov_b32_e32 v14, s9 ; 7E1C0209 v_min_f32_e32 v26, 0x7f7fffff, v26 ; 1E3434FF 7F7FFFFF v_min_f32_e32 v1, 0x7f7fffff, v28 ; 1E0238FF 7F7FFFFF v_mul_f32_e32 v18, v1, v21 ; 10242B01 v_mul_f32_e32 v12, v1, v22 ; 10182D01 v_mul_f32_e32 v1, v1, v23 ; 10022F01 v_mul_f32_e32 v21, v26, v10 ; 102A151A v_mul_f32_e32 v21, v21, v18 ; 102A2515 v_mul_f32_e32 v22, v26, v11 ; 102C171A v_mad_f32 v21, v12, v22, v21 ; D2820015 04562D0C v_mul_f32_e32 v22, v26, v9 ; 102C131A v_mad_f32 v21, v1, v22, v21 ; D2820015 04562D01 v_mul_f32_e32 v22, v18, v21 ; 102C2B12 v_mad_f32 v22, v21, v18, v22 ; D2820016 045A2515 v_mad_f32 v10, -v10, v26, v22 ; D282000A 245A350A v_mul_f32_e32 v22, v12, v21 ; 102C2B0C v_mad_f32 v22, v21, v12, v22 ; D2820016 045A1915 v_mad_f32 v11, -v11, v26, v22 ; D282000B 245A350B v_mul_f32_e32 v22, v1, v21 ; 102C2B01 v_mad_f32 v21, v21, v1, v22 ; D2820015 045A0315 s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110 s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111 s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112 s_buffer_load_dword s17, s[0:3], 0x14 ; C2088114 s_buffer_load_dword s16, s[0:3], 0x15 ; C2080115 s_buffer_load_dword s15, s[0:3], 0x16 ; C2078116 s_buffer_load_dword s62, s[0:3], 0x28 ; C21F0128 s_buffer_load_dword s61, s[0:3], 0x29 ; C21E8129 s_buffer_load_dword s60, s[0:3], 0x2a ; C21E012A s_buffer_load_dword s59, s[0:3], 0x2c ; C21D812C s_buffer_load_dword s58, s[0:3], 0x2d ; C21D012D s_buffer_load_dword s18, s[0:3], 0x2e ; C209012E s_buffer_load_dword s19, s[0:3], 0x30 ; C2098130 s_buffer_load_dword s56, s[0:3], 0x31 ; C21C0131 s_buffer_load_dword s57, s[0:3], 0x32 ; C21C8132 s_buffer_load_dword s11, s[0:3], 0x36 ; C2058136 s_buffer_load_dword s10, s[0:3], 0x38 ; C2050138 s_buffer_load_dword s9, s[0:3], 0x39 ; C2048139 s_buffer_load_dword s0, s[0:3], 0x3a ; C200013A v_mad_f32 v9, -v9, v26, v21 ; D2820009 24563509 v_mul_f32_e32 v21, v29, v15 ; 102A1F1D v_mad_f32 v21, v24, v13, -v21 ; D2820015 84561B18 v_mul_f32_e32 v22, v13, v27 ; 102C370D v_mad_f32 v23, v15, v25, -v22 ; D2820017 845A330F s_load_dwordx4 s[44:47], s[4:5], 0x4 ; C0960504 s_load_dwordx4 s[64:67], s[4:5], 0xc ; C0A0050C s_load_dwordx4 s[32:35], s[4:5], 0x10 ; C0900510 s_load_dwordx4 s[20:23], s[4:5], 0x14 ; C08A0514 v_mov_b32_e32 v22, s8 ; 7E2C0208 s_load_dwordx8 s[48:55], s[6:7], 0x8 ; C0D80708 s_load_dwordx8 s[68:75], s[6:7], 0x18 ; C0E20718 s_load_dwordx8 s[36:43], s[6:7], 0x20 ; C0D20720 s_load_dwordx8 s[24:31], s[6:7], 0x28 ; C0CC0728 v_mul_f32_e32 v26, v25, v24 ; 10343119 v_mad_f32 v28, v27, v29, -v26 ; D282001C 846A3B1B v_mul_f32_e32 v26, v5, v21 ; 10342B05 v_mul_f32_e32 v23, v5, v23 ; 102E2F05 v_mul_f32_e32 v21, v5, v28 ; 102A3905 v_min_f32_e32 v5, 0x7f7fffff, v20 ; 1E0A28FF 7F7FFFFF v_mul_f32_e32 v6, v5, v6 ; 100C0D05 v_mul_f32_e32 v7, v5, v7 ; 100E0F05 v_mul_f32_e32 v5, v5, v8 ; 100A1105 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[68:75], s[64:67] ; F0800100 02110802 v_sub_f32_e64 v20, 1.0, s62 ; D2080014 00007CF2 v_sub_f32_e64 v28, 1.0, s61 ; D208001C 00007AF2 v_sub_f32_e64 v30, 1.0, s60 ; D208001E 000078F2 v_mul_f32_e32 v31, s59, v14 ; 103E1C3B v_mul_f32_e32 v32, s58, v14 ; 10401C3A v_mul_f32_e32 v33, s18, v14 ; 10421C12 v_mul_f32_e32 v34, s19, v22 ; 10442C13 v_mul_f32_e32 v35, s56, v22 ; 10462C38 v_mul_f32_e32 v22, s57, v22 ; 102C2C39 v_mul_f32_e32 v14, v10, v25 ; 101C330A v_mad_f32 v14, v29, v11, v14 ; D282000E 043A171D v_mad_f32 v13, v13, v9, v14 ; D282000D 043A130D v_mul_f32_e32 v14, v10, v26 ; 101C350A v_mad_f32 v14, v23, v11, v14 ; D282000E 043A1717 v_mad_f32 v14, v21, v9, v14 ; D282000E 043A1315 v_mul_f32_e32 v21, v10, v27 ; 102A370A v_mad_f32 v21, v24, v11, v21 ; D2820015 04561718 v_mad_f32 v15, v15, v9, v21 ; D282000F 0456130F v_mov_b32_e32 v21, 0x3fc00000 ; 7E2A02FF 3FC00000 v_cubeid_f32 v39, v13, v14, v15 ; D2880027 043E1D0D v_cubema_f32 v38, v13, v14, v15 ; D28E0026 043E1D0D v_cubesc_f32 v37, v13, v14, v15 ; D28A0025 043E1D0D v_cubetc_f32 v36, v13, v14, v15 ; D28C0024 043E1D0D v_rcp_f32_e64 v13, |v38| ; D354010D 00000126 v_mad_f32 v38, v36, v13, v21 ; D2820026 04561B24 v_mad_f32 v37, v37, v13, v21 ; D2820025 04561B25 image_sample v[13:15], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[48:55], s[44:47] ; F0800700 016C0D25 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v13, v31, v13 ; 101A1B1F v_mul_f32_e32 v14, v32, v14 ; 101C1D20 v_mul_f32_e32 v15, v33, v15 ; 101E1F21 image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[32:35] ; F0800700 01091702 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v23, v34 ; 102A4517 v_mul_f32_e32 v23, v24, v35 ; 102E4718 v_mul_f32_e32 v22, v25, v22 ; 102C2D19 image_sample v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[20:23] ; F0800700 00A61802 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v19, v24, s17 ; D2820002 00463113 v_mad_f32 v3, v25, v19, s16 ; D2820003 00422719 v_mad_f32 v19, v26, v19, s15 ; D2820013 003E271A v_mad_f32 v13, v8, v13, v21 ; D282000D 04561B08 v_mad_f32 v14, v8, v14, v23 ; D282000E 045E1D08 v_mad_f32 v8, v8, v15, v22 ; D2820008 045A1F08 v_mul_f32_e32 v13, v13, v20 ; 101A290D v_mul_f32_e32 v14, v14, v28 ; 101C390E v_mul_f32_e32 v8, v8, v30 ; 10103D08 v_mad_f32 v13, v0, v13, s12 ; D282000D 00321B00 v_mad_f32 v14, v14, v0, s13 ; D282000E 0036010E v_mad_f32 v0, v8, v0, s14 ; D2820000 003A0108 v_mul_f32_e32 v8, v6, v18 ; 10102506 v_mad_f32 v8, v12, v7, v8 ; D2820008 04220F0C v_mul_f32_e32 v6, v6, v10 ; 100C1506 v_mad_f32 v6, v11, v7, v6 ; D2820006 041A0F0B v_mad_f32 v1, v1, v5, v8 ; D2820001 04220B01 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mad_f32 v5, v9, v5, v6 ; D2820005 041A0B09 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_and_b32_e32 v6, 0x7fffffff, v5 ; 360C0AFF 7FFFFFFF v_log_f32_e32 v6, v6 ; 7E0C4F06 v_mov_b32_e32 v7, 0xb58637bd ; 7E0E02FF B58637BD v_add_f32_e32 v8, v7, v1 ; 06100307 v_add_f32_e32 v5, v7, v5 ; 060A0B07 v_mul_legacy_f32_e32 v6, s11, v6 ; 0E0C0C0B v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_mov_b32_e32 v7, 0x41000000 ; 7E0E02FF 41000000 v_add_f32_e32 v7, s11, v7 ; 060E0E0B v_mul_f32_e32 v6, v6, v7 ; 100C0F06 v_mul_f32_e32 v6, 0x3d22f983, v6 ; 100C0CFF 3D22F983 v_mul_f32_e32 v7, v13, v1 ; 100E030D v_mul_f32_e32 v9, v14, v1 ; 1012030E v_mul_f32_e32 v0, v0, v1 ; 10000300 v_cmp_gt_f32_e32 vcc, 0, v8 ; 7C081080 v_cndmask_b32_e64 v1, v7, 0, vcc ; D2000001 01A90107 v_cndmask_b32_e64 v7, v9, 0, vcc ; D2000007 01A90109 v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 v_mul_f32_e32 v2, v6, v2 ; 10040506 v_mul_f32_e32 v3, v6, v3 ; 10060706 v_mul_f32_e32 v6, v6, v19 ; 100C2706 v_cmp_gt_f32_e32 vcc, 0, v5 ; 7C080A80 v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_cndmask_b32_e64 v5, v6, 0, vcc ; D2000005 01A90106 v_add_f32_e32 v1, v2, v1 ; 06020302 v_add_f32_e32 v2, v3, v7 ; 06040F03 v_add_f32_e32 v0, v5, v0 ; 06000105 v_mul_f32_e32 v1, v1, v17 ; 10022301 v_mul_f32_e32 v2, v2, v16 ; 10042102 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_mul_f32_e32 v1, s10, v1 ; 1002020A v_mul_f32_e32 v2, s9, v2 ; 10040409 v_mul_f32_e32 v0, s0, v0 ; 10000000 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 44 Code Size: 1784 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL IN[2], TEXCOORD[4], PERSPECTIVE DCL IN[3], TEXCOORD[6], PERSPECTIVE DCL IN[4], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..7] IMM[0] FLT32 { 2.0000, -1.0000, -0.0000, 0.0000} IMM[1] FLT32 { 8.0000, 0.0398, 0.0000, 340282346638528859811704183484516925440.0000} 0: DP3 TEMP[0].x, IN[3], IN[3] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[3], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[2], IN[2] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[2], TEMP[0].xxxx 8: TEX TEMP[3], IN[1], SAMP[1], 2D 9: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[0].x, TEMP[3], TEMP[3] 11: RSQ TEMP[0].x, TEMP[0].xxxx 12: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 13: MUL TEMP[4].xyz, TEMP[3], TEMP[0].xxxx 14: DP3 TEMP[1].w, TEMP[4], TEMP[1] 15: MUL TEMP[3].xyz, TEMP[1].wwww, TEMP[4] 16: MAD TEMP[1].xyz, TEMP[3], IMM[0].xxxx, -TEMP[1] 17: TEX TEMP[3], IN[0], SAMP[4], 2D 18: ADD TEMP[1].w, TEMP[3].xxxx, CONST[14].xxxx 19: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[14].yyyy 20: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].zzzz 21: POW TEMP[3].x, |TEMP[1].wwww|, CONST[14].zzzz 22: CMP TEMP[3].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[3].xxxx 23: UIF CONST[240].xxxx :45 24: MUL TEMP[5].xyz, CONST[7].xyww, IN[4].yyyy 25: MAD TEMP[5].xyz, CONST[6].xyww, IN[4].xxxx, TEMP[5] 26: MAD TEMP[5].xyz, CONST[8].xyww, IN[4].zzzz, TEMP[5] 27: MAD TEMP[5].xyz, CONST[9].xyww, IN[4].wwww, TEMP[5] 28: RCP TEMP[1].w, TEMP[5].zzzz 29: MUL TEMP[5].xy, TEMP[1].wwww, TEMP[5] 30: MAD TEMP[5].xy, TEMP[5], CONST[1], CONST[1].wzzw 31: TEX TEMP[5], TEMP[5], SAMP[0], 2D 32: UIF CONST[240].yyyy :0 33: DP3 TEMP[1].w, IN[4], IN[4] 34: RSQ TEMP[0], |TEMP[1].wwww| 35: MIN TEMP[1].w, IMM[1].wwww, TEMP[0] 36: RCP TEMP[1].w, TEMP[1].wwww 37: ADD TEMP[1].w, -TEMP[1].wwww, CONST[0].xxxx 38: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[0].yyyy 39: MUL TEMP[1].w, TEMP[1].wwww, TEMP[1].wwww 40: LRP TEMP[6].xyz, TEMP[1].wwww, TEMP[5].wwww, TEMP[3].zzzz 41: ELSE :43 42: MOV TEMP[6].xyz, TEMP[3].zzzz 43: ENDIF 44: MUL TEMP[3].xyz, TEMP[5], TEMP[6] 45: ENDIF 46: MOV TEMP[5].y, IMM[0].yyyy 47: ADD TEMP[5].xyz, -TEMP[5].yyyy, -CONST[10] 48: TEX TEMP[6], IN[1], SAMP[2], 2D 49: MUL TEMP[5].xyz, TEMP[5], TEMP[6] 50: MAD TEMP[5].xyz, TEMP[5], CONST[4].wwww, CONST[4] 51: MUL TEMP[6].xyz, CONST[11], CONST[11].wwww 52: TEX TEMP[7], IN[1], SAMP[3], 2D 53: MUL TEMP[6].xyz, TEMP[6], TEMP[7].zzzz 54: MAD TEMP[6].xyz, TEMP[6], CONST[5].wwww, CONST[5] 55: DP3_SAT TEMP[1].w, TEMP[4], TEMP[2] 56: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].zzzz 57: DP3_SAT TEMP[1].x, TEMP[1], TEMP[2] 58: ADD TEMP[1].y, TEMP[1].xxxx, IMM[0].zzzz 59: POW TEMP[2].x, |TEMP[1].xxxx|, CONST[12].xxxx 60: MOV TEMP[1].x, CONST[12].xxxx 61: ADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx 62: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 63: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 64: MUL TEMP[2].xyz, TEMP[1].wwww, TEMP[5] 65: CMP TEMP[2].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[2] 66: MUL TEMP[1].xzw, TEMP[6].xyyz, TEMP[1].xxxx 67: CMP TEMP[1].xyz, TEMP[1].yyyy, IMM[0].wwww, TEMP[1].xzww 68: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 69: MUL TEMP[1].xyz, TEMP[3], TEMP[1] 70: MUL OUT[0].xyz, TEMP[1], CONST[13] 71: MOV OUT[0].w, IMM[0].wwww 72: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %49 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %50 = bitcast <8 x i32> addrspace(2)* %49 to <32 x i8> addrspace(2)* %51 = load <32 x i8>, <32 x i8> addrspace(2)* %50, align 32, !tbaa !0 %52 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %53 = bitcast <4 x i32> addrspace(2)* %52 to <16 x i8> addrspace(2)* %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %56 = bitcast <8 x i32> addrspace(2)* %55 to <32 x i8> addrspace(2)* %57 = load <32 x i8>, <32 x i8> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %59 = bitcast <4 x i32> addrspace(2)* %58 to <16 x i8> addrspace(2)* %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %62 = bitcast <8 x i32> addrspace(2)* %61 to <32 x i8> addrspace(2)* %63 = load <32 x i8>, <32 x i8> addrspace(2)* %62, align 32, !tbaa !0 %64 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %65 = bitcast <4 x i32> addrspace(2)* %64 to <16 x i8> addrspace(2)* %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %68 = bitcast <8 x i32> addrspace(2)* %67 to <32 x i8> addrspace(2)* %69 = load <32 x i8>, <32 x i8> addrspace(2)* %68, align 32, !tbaa !0 %70 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %71 = bitcast <4 x i32> addrspace(2)* %70 to <16 x i8> addrspace(2)* %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %87 = fmul float %80, %80 %88 = fmul float %81, %81 %89 = fadd float %88, %87 %90 = fmul float %82, %82 %91 = fadd float %89, %90 %92 = call float @llvm.AMDGPU.rsq.clamped.f32(float %91) %93 = call float @llvm.minnum.f32(float %92, float 0x47EFFFFFE0000000) %94 = fmul float %80, %93 %95 = fmul float %81, %93 %96 = fmul float %82, %93 %97 = fmul float %77, %77 %98 = fmul float %78, %78 %99 = fadd float %98, %97 %100 = fmul float %79, %79 %101 = fadd float %99, %100 %102 = call float @llvm.AMDGPU.rsq.clamped.f32(float %101) %103 = call float @llvm.minnum.f32(float %102, float 0x47EFFFFFE0000000) %104 = fmul float %77, %103 %105 = fmul float %78, %103 %106 = fmul float %79, %103 %107 = bitcast float %75 to i32 %108 = bitcast float %76 to i32 %109 = insertelement <2 x i32> undef, i32 %107, i32 0 %110 = insertelement <2 x i32> %109, i32 %108, i32 1 %111 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %110, <32 x i8> %51, <16 x i8> %54, i32 2) %112 = extractelement <4 x float> %111, i32 0 %113 = extractelement <4 x float> %111, i32 1 %114 = extractelement <4 x float> %111, i32 2 %115 = fmul float %112, 2.000000e+00 %116 = fadd float %115, -1.000000e+00 %117 = fmul float %113, 2.000000e+00 %118 = fadd float %117, -1.000000e+00 %119 = fmul float %114, 2.000000e+00 %120 = fadd float %119, -1.000000e+00 %121 = fmul float %116, %116 %122 = fmul float %118, %118 %123 = fadd float %122, %121 %124 = fmul float %120, %120 %125 = fadd float %123, %124 %126 = call float @llvm.AMDGPU.rsq.clamped.f32(float %125) %127 = call float @llvm.minnum.f32(float %126, float 0x47EFFFFFE0000000) %128 = fmul float %116, %127 %129 = fmul float %118, %127 %130 = fmul float %120, %127 %131 = fmul float %128, %94 %132 = fmul float %129, %95 %133 = fadd float %132, %131 %134 = fmul float %130, %96 %135 = fadd float %133, %134 %136 = fmul float %135, %128 %137 = fmul float %135, %129 %138 = fmul float %135, %130 %139 = fmul float %136, 2.000000e+00 %140 = fsub float %139, %94 %141 = fmul float %137, 2.000000e+00 %142 = fsub float %141, %95 %143 = fmul float %138, 2.000000e+00 %144 = fsub float %143, %96 %145 = bitcast float %73 to i32 %146 = bitcast float %74 to i32 %147 = insertelement <2 x i32> undef, i32 %145, i32 0 %148 = insertelement <2 x i32> %147, i32 %146, i32 1 %149 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %148, <32 x i8> %69, <16 x i8> %72, i32 2) %150 = extractelement <4 x float> %149, i32 0 %151 = fadd float %150, %45 %152 = fmul float %151, %46 %153 = call float @llvm.AMDIL.clamp.(float %152, float 0.000000e+00, float 1.000000e+00) %154 = fadd float %153, 0xBEB0C6F7A0000000 %155 = call float @fabs(float %153) %156 = call float @llvm.pow.f32(float %155, float %47) %157 = call float @llvm.AMDGPU.cndlt(float %154, float 0.000000e+00, float %156) %158 = call float @llvm.AMDGPU.cndlt(float %154, float 0.000000e+00, float %156) %159 = call float @llvm.AMDGPU.cndlt(float %154, float 0.000000e+00, float %156) %160 = bitcast float %48 to i32 %161 = icmp eq i32 %160, 0 br i1 %161, label %ENDIF, label %IF IF: ; preds = %main_body %162 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %163 = load <16 x i8>, <16 x i8> addrspace(2)* %162, align 16, !tbaa !0 %164 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %165 = load <32 x i8>, <32 x i8> addrspace(2)* %164, align 32, !tbaa !0 %166 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3844) %167 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %168 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %169 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %170 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %171 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %172 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %173 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %174 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %175 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %176 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %177 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %178 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %179 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %180 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %181 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %182 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %183 = fmul float %175, %84 %184 = fmul float %174, %84 %185 = fmul float %173, %84 %186 = fmul float %178, %83 %187 = fadd float %186, %183 %188 = fmul float %177, %83 %189 = fadd float %188, %184 %190 = fmul float %176, %83 %191 = fadd float %190, %185 %192 = fmul float %172, %85 %193 = fadd float %192, %187 %194 = fmul float %171, %85 %195 = fadd float %194, %189 %196 = fmul float %170, %85 %197 = fadd float %196, %191 %198 = fmul float %169, %86 %199 = fadd float %198, %193 %200 = fmul float %168, %86 %201 = fadd float %200, %195 %202 = fmul float %167, %86 %203 = fadd float %202, %197 %204 = fdiv float 1.000000e+00, %203 %205 = fmul float %204, %199 %206 = fmul float %204, %201 %207 = fmul float %205, %182 %208 = fadd float %207, %179 %209 = fmul float %206, %181 %210 = fadd float %209, %180 %211 = bitcast float %208 to i32 %212 = bitcast float %210 to i32 %213 = insertelement <2 x i32> undef, i32 %211, i32 0 %214 = insertelement <2 x i32> %213, i32 %212, i32 1 %215 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %214, <32 x i8> %165, <16 x i8> %163, i32 2) %216 = extractelement <4 x float> %215, i32 0 %217 = extractelement <4 x float> %215, i32 1 %218 = extractelement <4 x float> %215, i32 2 %219 = extractelement <4 x float> %215, i32 3 %220 = bitcast float %166 to i32 %221 = icmp eq i32 %220, 0 br i1 %221, label %ENDIF32, label %IF33 ENDIF: ; preds = %main_body, %ENDIF32 %temp12.0 = phi float [ %320, %ENDIF32 ], [ %157, %main_body ] %temp13.0 = phi float [ %321, %ENDIF32 ], [ %158, %main_body ] %temp14.0 = phi float [ %322, %ENDIF32 ], [ %159, %main_body ] %222 = fsub float 1.000000e+00, %34 %223 = fsub float 1.000000e+00, %35 %224 = fsub float 1.000000e+00, %36 %225 = bitcast float %75 to i32 %226 = bitcast float %76 to i32 %227 = insertelement <2 x i32> undef, i32 %225, i32 0 %228 = insertelement <2 x i32> %227, i32 %226, i32 1 %229 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %228, <32 x i8> %57, <16 x i8> %60, i32 2) %230 = extractelement <4 x float> %229, i32 0 %231 = extractelement <4 x float> %229, i32 1 %232 = extractelement <4 x float> %229, i32 2 %233 = fmul float %222, %230 %234 = fmul float %223, %231 %235 = fmul float %224, %232 %236 = fmul float %233, %29 %237 = fadd float %236, %26 %238 = fmul float %234, %29 %239 = fadd float %238, %27 %240 = fmul float %235, %29 %241 = fadd float %240, %28 %242 = fmul float %37, %40 %243 = fmul float %38, %40 %244 = fmul float %39, %40 %245 = bitcast float %75 to i32 %246 = bitcast float %76 to i32 %247 = insertelement <2 x i32> undef, i32 %245, i32 0 %248 = insertelement <2 x i32> %247, i32 %246, i32 1 %249 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %248, <32 x i8> %63, <16 x i8> %66, i32 2) %250 = extractelement <4 x float> %249, i32 2 %251 = fmul float %242, %250 %252 = fmul float %243, %250 %253 = fmul float %244, %250 %254 = fmul float %251, %33 %255 = fadd float %254, %30 %256 = fmul float %252, %33 %257 = fadd float %256, %31 %258 = fmul float %253, %33 %259 = fadd float %258, %32 %260 = fmul float %128, %104 %261 = fmul float %129, %105 %262 = fadd float %261, %260 %263 = fmul float %130, %106 %264 = fadd float %262, %263 %265 = call float @llvm.AMDIL.clamp.(float %264, float 0.000000e+00, float 1.000000e+00) %266 = fadd float %265, 0xBEB0C6F7A0000000 %267 = fmul float %140, %104 %268 = fmul float %142, %105 %269 = fadd float %268, %267 %270 = fmul float %144, %106 %271 = fadd float %269, %270 %272 = call float @llvm.AMDIL.clamp.(float %271, float 0.000000e+00, float 1.000000e+00) %273 = fadd float %272, 0xBEB0C6F7A0000000 %274 = call float @fabs(float %272) %275 = call float @llvm.pow.f32(float %274, float %41) %276 = fadd float %41, 8.000000e+00 %277 = fmul float %276, %275 %278 = fmul float %277, 0x3FA45F3060000000 %279 = fmul float %265, %237 %280 = fmul float %265, %239 %281 = fmul float %265, %241 %282 = call float @llvm.AMDGPU.cndlt(float %266, float 0.000000e+00, float %279) %283 = call float @llvm.AMDGPU.cndlt(float %266, float 0.000000e+00, float %280) %284 = call float @llvm.AMDGPU.cndlt(float %266, float 0.000000e+00, float %281) %285 = fmul float %255, %278 %286 = fmul float %257, %278 %287 = fmul float %259, %278 %288 = call float @llvm.AMDGPU.cndlt(float %273, float 0.000000e+00, float %285) %289 = call float @llvm.AMDGPU.cndlt(float %273, float 0.000000e+00, float %286) %290 = call float @llvm.AMDGPU.cndlt(float %273, float 0.000000e+00, float %287) %291 = fadd float %288, %282 %292 = fadd float %289, %283 %293 = fadd float %290, %284 %294 = fmul float %temp12.0, %291 %295 = fmul float %temp13.0, %292 %296 = fmul float %temp14.0, %293 %297 = fmul float %294, %42 %298 = fmul float %295, %43 %299 = fmul float %296, %44 %300 = call i32 @llvm.SI.packf16(float %297, float %298) %301 = bitcast i32 %300 to float %302 = call i32 @llvm.SI.packf16(float %299, float 0.000000e+00) %303 = bitcast i32 %302 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %301, float %303, float %301, float %303) ret void IF33: ; preds = %IF %304 = fmul float %83, %83 %305 = fmul float %84, %84 %306 = fadd float %305, %304 %307 = fmul float %85, %85 %308 = fadd float %306, %307 %309 = call float @fabs(float %308) %310 = call float @llvm.AMDGPU.rsq.clamped.f32(float %309) %311 = call float @llvm.minnum.f32(float %310, float 0x47EFFFFFE0000000) %312 = fdiv float 1.000000e+00, %311 %313 = fsub float %24, %312 %314 = fmul float %313, %25 %315 = call float @llvm.AMDIL.clamp.(float %314, float 0.000000e+00, float 1.000000e+00) %316 = fmul float %315, %315 %317 = call float @llvm.AMDGPU.lrp(float %316, float %219, float %159) %318 = call float @llvm.AMDGPU.lrp(float %316, float %219, float %159) %319 = call float @llvm.AMDGPU.lrp(float %316, float %219, float %159) br label %ENDIF32 ENDIF32: ; preds = %IF, %IF33 %temp24.0 = phi float [ %317, %IF33 ], [ %159, %IF ] %temp25.0 = phi float [ %318, %IF33 ], [ %159, %IF ] %temp26.0 = phi float [ %319, %IF33 ], [ %159, %IF ] %320 = fmul float %216, %temp24.0 %321 = fmul float %217, %temp25.0 %322 = fmul float %218, %temp26.0 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_movk_i32 s0, 0xf00 ; B0000F00 v_mov_b32_e32 v10, 0xb58637bd ; 7E1402FF B58637BD s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p1_f32 v11, v0, 0, 0, [m0] ; C82C0000 v_interp_p2_f32 v11, [v11], v1, 0, 0, [m0] ; C82D0001 v_interp_p1_f32 v12, v0, 1, 0, [m0] ; C8300100 v_interp_p2_f32 v12, [v12], v1, 1, 0, [m0] ; C8310101 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v4, v0, 0, 2, [m0] ; C8100800 v_interp_p2_f32 v4, [v4], v1, 0, 2, [m0] ; C8110801 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s3, s[8:11], s0 ; C2018800 s_buffer_load_dword s2, s[8:11], 0x13 ; C2010913 s_buffer_load_dword s0, s[8:11], 0x17 ; C2000917 s_buffer_load_dword s1, s[8:11], 0x2f ; C200892F s_buffer_load_dword s12, s[8:11], 0x38 ; C2060938 s_buffer_load_dword s13, s[8:11], 0x39 ; C2068939 s_buffer_load_dword s14, s[8:11], 0x3a ; C207093A v_interp_p1_f32 v5, v0, 1, 2, [m0] ; C8140900 v_interp_p2_f32 v5, [v5], v1, 1, 2, [m0] ; C8150901 v_interp_p1_f32 v6, v0, 2, 2, [m0] ; C8180A00 v_interp_p2_f32 v6, [v6], v1, 2, 2, [m0] ; C8190A01 v_interp_p1_f32 v9, v0, 0, 3, [m0] ; C8240C00 v_interp_p2_f32 v9, [v9], v1, 0, 3, [m0] ; C8250C01 v_interp_p1_f32 v8, v0, 1, 3, [m0] ; C8200D00 v_interp_p2_f32 v8, [v8], v1, 1, 3, [m0] ; C8210D01 v_interp_p1_f32 v7, v0, 2, 3, [m0] ; C81C0E00 v_interp_p2_f32 v7, [v7], v1, 2, 3, [m0] ; C81D0E01 s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_load_dwordx4 s[20:23], s[4:5], 0x10 ; C08A0510 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_load_dwordx8 s[32:39], s[6:7], 0x20 ; C0D00720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[16:18], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[16:19] ; F0800700 00861002 image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[32:39], s[20:23] ; F0800100 00A80B0B s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v11, s12, v11 ; 0616160C v_mul_f32_e32 v11, s13, v11 ; 1016160D v_add_f32_e64 v13, 0, v11 clamp ; D206080D 00021680 v_and_b32_e32 v11, 0x7fffffff, v13 ; 36161AFF 7FFFFFFF v_log_f32_e32 v14, v11 ; 7E1C4F0B v_mad_f32 v15, 2.0, v16, -1.0 ; D282000F 03CE20F4 v_mad_f32 v12, 2.0, v17, -1.0 ; D282000C 03CE22F4 v_mad_f32 v11, 2.0, v18, -1.0 ; D282000B 03CE24F4 v_add_f32_e32 v10, v13, v10 ; 0614150D v_mul_legacy_f32_e32 v13, s14, v14 ; 0E1A1C0E v_mul_f32_e32 v14, v9, v9 ; 101C1309 v_mad_f32 v14, v8, v8, v14 ; D282000E 043A1108 v_mad_f32 v14, v7, v7, v14 ; D282000E 043A0F07 v_rsq_clamp_f32_e32 v16, v14 ; 7E20590E v_mul_f32_e32 v14, v4, v4 ; 101C0904 v_mad_f32 v14, v5, v5, v14 ; D282000E 043A0B05 v_mad_f32 v14, v6, v6, v14 ; D282000E 043A0D06 v_rsq_clamp_f32_e32 v17, v14 ; 7E22590E v_mul_f32_e32 v14, v15, v15 ; 101C1F0F v_mad_f32 v14, v12, v12, v14 ; D282000E 043A190C v_mad_f32 v14, v11, v11, v14 ; D282000E 043A170B v_rsq_clamp_f32_e32 v18, v14 ; 7E24590E v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v10, v13, 0, vcc ; D200000A 01A9010D v_cmp_ne_i32_e64 s[12:13], 0, s3 ; D10A000C 00000680 v_mov_b32_e32 v13, v10 ; 7E1A030A v_mov_b32_e32 v14, v10 ; 7E1C030A s_and_saveexec_b64 s[12:13], s[12:13] ; BE8C240C s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_cbranch_execz BB0_4 ; BF880000 v_interp_p1_f32 v13, v0, 0, 4, [m0] ; C8341000 v_interp_p2_f32 v13, [v13], v1, 0, 4, [m0] ; C8351001 v_interp_p1_f32 v14, v0, 1, 4, [m0] ; C8381100 v_interp_p2_f32 v14, [v14], v1, 1, 4, [m0] ; C8391101 v_interp_p1_f32 v19, v0, 2, 4, [m0] ; C84C1200 v_interp_p2_f32 v19, [v19], v1, 2, 4, [m0] ; C84D1201 s_buffer_load_dword s3, s[8:11], 0x19 ; C2018919 s_buffer_load_dword s14, s[8:11], 0x1b ; C207091B s_buffer_load_dword s15, s[8:11], 0x1c ; C207891C s_buffer_load_dword s16, s[8:11], 0x1d ; C208091D s_buffer_load_dword s17, s[8:11], 0x4 ; C2088904 s_buffer_load_dword s18, s[8:11], 0x5 ; C2090905 s_buffer_load_dword s19, s[8:11], 0x6 ; C2098906 s_buffer_load_dword s20, s[8:11], 0x7 ; C20A0907 s_buffer_load_dword s21, s[8:11], 0x18 ; C20A8918 v_interp_p1_f32 v0, v0, 3, 4, [m0] ; C8001300 v_interp_p2_f32 v0, [v0], v1, 3, 4, [m0] ; C8011301 s_buffer_load_dword s22, s[8:11], 0x27 ; C20B0927 s_buffer_load_dword s23, s[8:11], 0x1f ; C20B891F s_buffer_load_dword s24, s[8:11], 0x20 ; C20C0920 s_buffer_load_dword s25, s[8:11], 0x21 ; C20C8921 s_buffer_load_dword s26, s[8:11], 0x23 ; C20D0923 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s15, v14 ; 10021C0F v_mul_f32_e32 v20, s16, v14 ; 10281C10 v_mad_f32 v1, s21, v13, v1 ; D2820001 04061A15 v_mad_f32 v20, s3, v13, v20 ; D2820014 04521A03 s_buffer_load_dword s3, s[8:11], 0x24 ; C2018924 s_buffer_load_dword s15, s[8:11], 0x25 ; C2078925 v_mul_f32_e32 v21, s23, v14 ; 102A1C17 v_mad_f32 v21, s14, v13, v21 ; D2820015 04561A0E v_mad_f32 v1, s24, v19, v1 ; D2820001 04062618 v_mad_f32 v21, s26, v19, v21 ; D2820015 0456261A v_mad_f32 v21, s22, v0, v21 ; D2820015 04560016 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_movk_i32 s14, 0xf04 ; B00E0F04 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_buffer_load_dword s14, s[8:11], s14 ; C207080E v_rcp_f32_e32 v21, v21 ; 7E2A5515 v_mad_f32 v20, s25, v19, v20 ; D2820014 04522619 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s3, v0, v1 ; D2820001 04060003 v_mad_f32 v0, s15, v0, v20 ; D2820000 0452000F v_mul_f32_e32 v1, v1, v21 ; 10022B01 v_mul_f32_e32 v0, v0, v21 ; 10002B00 v_mov_b32_e32 v20, s20 ; 7E280214 v_mad_f32 v20, s17, v1, v20 ; D2820014 04520211 v_mov_b32_e32 v1, s19 ; 7E020213 v_mad_f32 v21, s18, v0, v1 ; D2820015 04060012 image_sample v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[32:39], s[28:31] ; F0800F00 00E81414 v_cmp_ne_i32_e64 s[14:15], 0, s14 ; D10A000E 00001C80 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[14:15], s[14:15] ; BE8E240E s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E s_cbranch_execz BB0_5 ; BF880000 s_buffer_load_dword s3, s[8:11], 0x0 ; C2018900 s_buffer_load_dword s16, s[8:11], 0x1 ; C2080901 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v13, v13 ; 10001B0D v_mad_f32 v0, v14, v14, v0 ; D2820000 04021D0E v_mad_f32 v0, v19, v19, v0 ; D2820000 04022713 v_rsq_clamp_f32_e64 v0, |v0| ; D3580100 00000100 v_min_f32_e32 v0, 0x7f7fffff, v0 ; 1E0000FF 7F7FFFFF v_rcp_f32_e32 v0, v0 ; 7E005500 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v0, s3, v0 ; 08000003 v_mul_f32_e32 v0, s16, v0 ; 10000010 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v1, v0, v0 ; 10020100 v_mad_f32 v0, -v0, v0, 1.0 ; D2820000 23CA0100 v_mul_f32_e32 v0, v10, v0 ; 1000010A v_mad_f32 v10, v1, v23, v0 ; D282000A 04022F01 s_or_b64 exec, exec, s[14:15] ; 88FE0E7E v_mul_f32_e32 v14, v10, v20 ; 101C290A v_mul_f32_e32 v13, v10, v21 ; 101A2B0A v_mul_f32_e32 v10, v10, v22 ; 10142D0A s_or_b64 exec, exec, s[12:13] ; 88FE0C7E v_mov_b32_e32 v0, s2 ; 7E000202 v_min_f32_e32 v16, 0x7f7fffff, v16 ; 1E2020FF 7F7FFFFF v_min_f32_e32 v17, 0x7f7fffff, v17 ; 1E2222FF 7F7FFFFF v_min_f32_e32 v18, 0x7f7fffff, v18 ; 1E2424FF 7F7FFFFF v_mul_f32_e32 v1, v18, v15 ; 10021F12 s_buffer_load_dword s21, s[8:11], 0x10 ; C20A8910 s_buffer_load_dword s17, s[8:11], 0x11 ; C2088911 s_buffer_load_dword s16, s[8:11], 0x12 ; C2080912 s_buffer_load_dword s13, s[8:11], 0x14 ; C2068914 s_buffer_load_dword s14, s[8:11], 0x15 ; C2070915 s_buffer_load_dword s15, s[8:11], 0x16 ; C2078916 s_buffer_load_dword s23, s[8:11], 0x28 ; C20B8928 s_buffer_load_dword s22, s[8:11], 0x29 ; C20B0929 s_buffer_load_dword s24, s[8:11], 0x2a ; C20C092A s_buffer_load_dword s18, s[8:11], 0x2c ; C209092C s_buffer_load_dword s19, s[8:11], 0x2d ; C209892D s_buffer_load_dword s20, s[8:11], 0x2e ; C20A092E s_buffer_load_dword s12, s[8:11], 0x30 ; C2060930 s_buffer_load_dword s2, s[8:11], 0x34 ; C2010934 s_buffer_load_dword s3, s[8:11], 0x35 ; C2018935 s_buffer_load_dword s8, s[8:11], 0x36 ; C2040936 v_mul_f32_e32 v12, v18, v12 ; 10181912 v_mul_f32_e32 v11, v18, v11 ; 10161712 v_mul_f32_e32 v15, v16, v9 ; 101E1310 v_mul_f32_e32 v15, v15, v1 ; 101E030F v_mul_f32_e32 v18, v16, v8 ; 10241110 v_mad_f32 v15, v12, v18, v15 ; D282000F 043E250C v_mul_f32_e32 v18, v16, v7 ; 10240F10 v_mad_f32 v15, v11, v18, v15 ; D282000F 043E250B v_mul_f32_e32 v18, v1, v15 ; 10241F01 v_mad_f32 v18, v15, v1, v18 ; D2820012 044A030F v_mad_f32 v9, -v9, v16, v18 ; D2820009 244A2109 s_load_dwordx4 s[28:31], s[4:5], 0x8 ; C08E0508 s_load_dwordx4 s[32:35], s[4:5], 0xc ; C090050C s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 s_load_dwordx8 s[44:51], s[6:7], 0x18 ; C0D60718 v_mul_f32_e32 v18, v12, v15 ; 10241F0C v_mad_f32 v18, v15, v12, v18 ; D2820012 044A190F v_mad_f32 v8, -v8, v16, v18 ; D2820008 244A2108 v_mul_f32_e32 v18, v11, v15 ; 10241F0B v_mad_f32 v15, v15, v11, v18 ; D282000F 044A170F v_mad_f32 v7, -v7, v16, v15 ; D2820007 243E2107 v_mul_f32_e32 v15, v17, v4 ; 101E0911 v_mul_f32_e32 v5, v17, v5 ; 100A0B11 v_mul_f32_e32 v4, v17, v6 ; 10080D11 v_mov_b32_e32 v6, s0 ; 7E0C0200 v_mov_b32_e32 v16, s1 ; 7E200201 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[28:31] ; F0800700 00E91102 image_sample v2, 4, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[44:51], s[32:35] ; F0800400 010B0202 v_sub_f32_e64 v3, 1.0, s23 ; D2080003 00002EF2 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v3, v17, v3 ; 10060711 v_mad_f32 v3, v0, v3, s21 ; D2820003 00560700 v_mul_f32_e32 v1, v15, v1 ; 1002030F v_sub_f32_e64 v17, 1.0, s22 ; D2080011 00002CF2 v_sub_f32_e64 v20, 1.0, s24 ; D2080014 000030F2 v_mul_f32_e32 v17, v18, v17 ; 10222312 v_mul_f32_e32 v18, v19, v20 ; 10242913 v_mad_f32 v17, v17, v0, s17 ; D2820011 00460111 v_mad_f32 v0, v18, v0, s16 ; D2820000 00420112 v_mul_f32_e32 v18, s18, v16 ; 10242012 v_mul_f32_e32 v19, s19, v16 ; 10262013 v_mul_f32_e32 v16, s20, v16 ; 10202014 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v18, v2, v18 ; 10242502 v_mul_f32_e32 v19, v2, v19 ; 10262702 v_mul_f32_e32 v2, v2, v16 ; 10042102 v_mad_f32 v16, v6, v18, s13 ; D2820010 00362506 v_mad_f32 v18, v19, v6, s14 ; D2820012 003A0D13 v_mad_f32 v2, v2, v6, s15 ; D2820002 003E0D02 v_mad_f32 v1, v12, v5, v1 ; D2820001 04060B0C v_mul_f32_e32 v6, v15, v9 ; 100C130F v_mad_f32 v5, v8, v5, v6 ; D2820005 041A0B08 v_mad_f32 v1, v11, v4, v1 ; D2820001 0406090B v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mad_f32 v4, v7, v4, v5 ; D2820004 04160907 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_and_b32_e32 v5, 0x7fffffff, v4 ; 360A08FF 7FFFFFFF v_log_f32_e32 v5, v5 ; 7E0A4F05 v_mov_b32_e32 v6, 0xb58637bd ; 7E0C02FF B58637BD v_add_f32_e32 v7, v6, v1 ; 060E0306 v_add_f32_e32 v4, v6, v4 ; 06080906 v_mul_legacy_f32_e32 v5, s12, v5 ; 0E0A0A0C v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_mov_b32_e32 v6, 0x41000000 ; 7E0C02FF 41000000 v_add_f32_e32 v6, s12, v6 ; 060C0C0C v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_mul_f32_e32 v5, 0x3d22f983, v5 ; 100A0AFF 3D22F983 v_mul_f32_e32 v3, v3, v1 ; 10060303 v_mul_f32_e32 v6, v17, v1 ; 100C0311 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_cmp_gt_f32_e32 vcc, 0, v7 ; 7C080E80 v_cndmask_b32_e64 v1, v3, 0, vcc ; D2000001 01A90103 v_cndmask_b32_e64 v3, v6, 0, vcc ; D2000003 01A90106 v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 v_mul_f32_e32 v6, v5, v16 ; 100C2105 v_mul_f32_e32 v7, v5, v18 ; 100E2505 v_mul_f32_e32 v2, v5, v2 ; 10040505 v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v4, v6, 0, vcc ; D2000004 01A90106 v_cndmask_b32_e64 v5, v7, 0, vcc ; D2000005 01A90107 v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_add_f32_e32 v1, v1, v4 ; 06020901 v_add_f32_e32 v3, v3, v5 ; 06060B03 v_add_f32_e32 v0, v0, v2 ; 06000500 v_mul_f32_e32 v1, v1, v14 ; 10021D01 v_mul_f32_e32 v2, v3, v13 ; 10041B03 v_mul_f32_e32 v0, v0, v10 ; 10001500 v_mul_f32_e32 v1, s2, v1 ; 10020202 v_mul_f32_e32 v2, s3, v2 ; 10040403 v_mul_f32_e32 v0, s8, v0 ; 10000008 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 24 Code Size: 1364 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL IN[2], TEXCOORD[4], PERSPECTIVE DCL IN[3], TEXCOORD[6], PERSPECTIVE DCL IN[4], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..7] IMM[0] FLT32 { 2.0000, -1.0000, -0.0000, 0.0000} IMM[1] FLT32 { 8.0000, 0.0398, 0.0000, 340282346638528859811704183484516925440.0000} 0: DP3 TEMP[0].x, IN[3], IN[3] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[3], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[2], IN[2] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[2], TEMP[0].xxxx 8: TEX TEMP[3], IN[1], SAMP[1], 2D 9: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[0].x, TEMP[3], TEMP[3] 11: RSQ TEMP[0].x, TEMP[0].xxxx 12: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 13: MUL TEMP[4].xyz, TEMP[3], TEMP[0].xxxx 14: DP3 TEMP[1].w, TEMP[4], TEMP[1] 15: MUL TEMP[3].xyz, TEMP[1].wwww, TEMP[4] 16: MAD TEMP[1].xyz, TEMP[3], IMM[0].xxxx, -TEMP[1] 17: TEX TEMP[3], IN[0], SAMP[4], 2D 18: ADD TEMP[1].w, TEMP[3].xxxx, CONST[15].xxxx 19: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[15].yyyy 20: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].zzzz 21: POW TEMP[3].x, |TEMP[1].wwww|, CONST[15].zzzz 22: CMP TEMP[3].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[3].xxxx 23: UIF CONST[240].xxxx :45 24: MUL TEMP[5].xyz, CONST[7].xyww, IN[4].yyyy 25: MAD TEMP[5].xyz, CONST[6].xyww, IN[4].xxxx, TEMP[5] 26: MAD TEMP[5].xyz, CONST[8].xyww, IN[4].zzzz, TEMP[5] 27: MAD TEMP[5].xyz, CONST[9].xyww, IN[4].wwww, TEMP[5] 28: RCP TEMP[1].w, TEMP[5].zzzz 29: MUL TEMP[5].xy, TEMP[1].wwww, TEMP[5] 30: MAD TEMP[5].xy, TEMP[5], CONST[1], CONST[1].wzzw 31: TEX TEMP[5], TEMP[5], SAMP[0], 2D 32: UIF CONST[240].yyyy :0 33: DP3 TEMP[1].w, IN[4], IN[4] 34: RSQ TEMP[0], |TEMP[1].wwww| 35: MIN TEMP[1].w, IMM[1].wwww, TEMP[0] 36: RCP TEMP[1].w, TEMP[1].wwww 37: ADD TEMP[1].w, -TEMP[1].wwww, CONST[0].xxxx 38: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[0].yyyy 39: MUL TEMP[1].w, TEMP[1].wwww, TEMP[1].wwww 40: LRP TEMP[6].xyz, TEMP[1].wwww, TEMP[5].wwww, TEMP[3].zzzz 41: ELSE :43 42: MOV TEMP[6].xyz, TEMP[3].zzzz 43: ENDIF 44: MUL TEMP[3].xyz, TEMP[5], TEMP[6] 45: ENDIF 46: MOV TEMP[5].y, IMM[0].yyyy 47: ADD TEMP[5].xyz, -TEMP[5].yyyy, -CONST[10] 48: MUL TEMP[6].xyz, CONST[11], CONST[11].wwww 49: TEX TEMP[7], IN[1], SAMP[2], 2D 50: MUL TEMP[6].xyz, TEMP[6], TEMP[7] 51: MUL TEMP[5].xyz, TEMP[5], TEMP[6] 52: MAD TEMP[5].xyz, TEMP[5], CONST[4].wwww, CONST[4] 53: MUL TEMP[6].xyz, CONST[12], CONST[12].wwww 54: TEX TEMP[7], IN[1], SAMP[3], 2D 55: MUL TEMP[6].xyz, TEMP[6], TEMP[7].xxxx 56: MAD TEMP[6].xyz, TEMP[6], CONST[5].wwww, CONST[5] 57: DP3_SAT TEMP[1].w, TEMP[4], TEMP[2] 58: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].zzzz 59: DP3_SAT TEMP[1].x, TEMP[1], TEMP[2] 60: ADD TEMP[1].y, TEMP[1].xxxx, IMM[0].zzzz 61: POW TEMP[2].x, |TEMP[1].xxxx|, CONST[13].xxxx 62: MOV TEMP[1].x, CONST[13].xxxx 63: ADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx 64: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 65: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 66: MUL TEMP[2].xyz, TEMP[1].wwww, TEMP[5] 67: CMP TEMP[2].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[2] 68: MUL TEMP[1].xzw, TEMP[6].xyyz, TEMP[1].xxxx 69: CMP TEMP[1].xyz, TEMP[1].yyyy, IMM[0].wwww, TEMP[1].xzww 70: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 71: MUL TEMP[1].xyz, TEMP[3], TEMP[1] 72: MUL OUT[0].xyz, TEMP[1], CONST[14] 73: MOV OUT[0].w, IMM[0].wwww 74: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %54 = bitcast <8 x i32> addrspace(2)* %53 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %57 = bitcast <4 x i32> addrspace(2)* %56 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %60 = bitcast <8 x i32> addrspace(2)* %59 to <32 x i8> addrspace(2)* %61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %63 = bitcast <4 x i32> addrspace(2)* %62 to <16 x i8> addrspace(2)* %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %72 = bitcast <8 x i32> addrspace(2)* %71 to <32 x i8> addrspace(2)* %73 = load <32 x i8>, <32 x i8> addrspace(2)* %72, align 32, !tbaa !0 %74 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %75 = bitcast <4 x i32> addrspace(2)* %74 to <16 x i8> addrspace(2)* %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 %77 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %91 = fmul float %84, %84 %92 = fmul float %85, %85 %93 = fadd float %92, %91 %94 = fmul float %86, %86 %95 = fadd float %93, %94 %96 = call float @llvm.AMDGPU.rsq.clamped.f32(float %95) %97 = call float @llvm.minnum.f32(float %96, float 0x47EFFFFFE0000000) %98 = fmul float %84, %97 %99 = fmul float %85, %97 %100 = fmul float %86, %97 %101 = fmul float %81, %81 %102 = fmul float %82, %82 %103 = fadd float %102, %101 %104 = fmul float %83, %83 %105 = fadd float %103, %104 %106 = call float @llvm.AMDGPU.rsq.clamped.f32(float %105) %107 = call float @llvm.minnum.f32(float %106, float 0x47EFFFFFE0000000) %108 = fmul float %81, %107 %109 = fmul float %82, %107 %110 = fmul float %83, %107 %111 = bitcast float %79 to i32 %112 = bitcast float %80 to i32 %113 = insertelement <2 x i32> undef, i32 %111, i32 0 %114 = insertelement <2 x i32> %113, i32 %112, i32 1 %115 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %114, <32 x i8> %55, <16 x i8> %58, i32 2) %116 = extractelement <4 x float> %115, i32 0 %117 = extractelement <4 x float> %115, i32 1 %118 = extractelement <4 x float> %115, i32 2 %119 = fmul float %116, 2.000000e+00 %120 = fadd float %119, -1.000000e+00 %121 = fmul float %117, 2.000000e+00 %122 = fadd float %121, -1.000000e+00 %123 = fmul float %118, 2.000000e+00 %124 = fadd float %123, -1.000000e+00 %125 = fmul float %120, %120 %126 = fmul float %122, %122 %127 = fadd float %126, %125 %128 = fmul float %124, %124 %129 = fadd float %127, %128 %130 = call float @llvm.AMDGPU.rsq.clamped.f32(float %129) %131 = call float @llvm.minnum.f32(float %130, float 0x47EFFFFFE0000000) %132 = fmul float %120, %131 %133 = fmul float %122, %131 %134 = fmul float %124, %131 %135 = fmul float %132, %98 %136 = fmul float %133, %99 %137 = fadd float %136, %135 %138 = fmul float %134, %100 %139 = fadd float %137, %138 %140 = fmul float %139, %132 %141 = fmul float %139, %133 %142 = fmul float %139, %134 %143 = fmul float %140, 2.000000e+00 %144 = fsub float %143, %98 %145 = fmul float %141, 2.000000e+00 %146 = fsub float %145, %99 %147 = fmul float %142, 2.000000e+00 %148 = fsub float %147, %100 %149 = bitcast float %77 to i32 %150 = bitcast float %78 to i32 %151 = insertelement <2 x i32> undef, i32 %149, i32 0 %152 = insertelement <2 x i32> %151, i32 %150, i32 1 %153 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %152, <32 x i8> %73, <16 x i8> %76, i32 2) %154 = extractelement <4 x float> %153, i32 0 %155 = fadd float %154, %49 %156 = fmul float %155, %50 %157 = call float @llvm.AMDIL.clamp.(float %156, float 0.000000e+00, float 1.000000e+00) %158 = fadd float %157, 0xBEB0C6F7A0000000 %159 = call float @fabs(float %157) %160 = call float @llvm.pow.f32(float %159, float %51) %161 = call float @llvm.AMDGPU.cndlt(float %158, float 0.000000e+00, float %160) %162 = call float @llvm.AMDGPU.cndlt(float %158, float 0.000000e+00, float %160) %163 = call float @llvm.AMDGPU.cndlt(float %158, float 0.000000e+00, float %160) %164 = bitcast float %52 to i32 %165 = icmp eq i32 %164, 0 br i1 %165, label %ENDIF, label %IF IF: ; preds = %main_body %166 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %167 = load <16 x i8>, <16 x i8> addrspace(2)* %166, align 16, !tbaa !0 %168 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %169 = load <32 x i8>, <32 x i8> addrspace(2)* %168, align 32, !tbaa !0 %170 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3844) %171 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %172 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %173 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %174 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %175 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %176 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %177 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %178 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %179 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %180 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %181 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %182 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %183 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %184 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %185 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %186 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %187 = fmul float %179, %88 %188 = fmul float %178, %88 %189 = fmul float %177, %88 %190 = fmul float %182, %87 %191 = fadd float %190, %187 %192 = fmul float %181, %87 %193 = fadd float %192, %188 %194 = fmul float %180, %87 %195 = fadd float %194, %189 %196 = fmul float %176, %89 %197 = fadd float %196, %191 %198 = fmul float %175, %89 %199 = fadd float %198, %193 %200 = fmul float %174, %89 %201 = fadd float %200, %195 %202 = fmul float %173, %90 %203 = fadd float %202, %197 %204 = fmul float %172, %90 %205 = fadd float %204, %199 %206 = fmul float %171, %90 %207 = fadd float %206, %201 %208 = fdiv float 1.000000e+00, %207 %209 = fmul float %208, %203 %210 = fmul float %208, %205 %211 = fmul float %209, %186 %212 = fadd float %211, %183 %213 = fmul float %210, %185 %214 = fadd float %213, %184 %215 = bitcast float %212 to i32 %216 = bitcast float %214 to i32 %217 = insertelement <2 x i32> undef, i32 %215, i32 0 %218 = insertelement <2 x i32> %217, i32 %216, i32 1 %219 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %218, <32 x i8> %169, <16 x i8> %167, i32 2) %220 = extractelement <4 x float> %219, i32 0 %221 = extractelement <4 x float> %219, i32 1 %222 = extractelement <4 x float> %219, i32 2 %223 = extractelement <4 x float> %219, i32 3 %224 = bitcast float %170 to i32 %225 = icmp eq i32 %224, 0 br i1 %225, label %ENDIF32, label %IF33 ENDIF: ; preds = %main_body, %ENDIF32 %temp12.0 = phi float [ %330, %ENDIF32 ], [ %161, %main_body ] %temp13.0 = phi float [ %331, %ENDIF32 ], [ %162, %main_body ] %temp14.0 = phi float [ %332, %ENDIF32 ], [ %163, %main_body ] %226 = fsub float 1.000000e+00, %34 %227 = fsub float 1.000000e+00, %35 %228 = fsub float 1.000000e+00, %36 %229 = fmul float %37, %40 %230 = fmul float %38, %40 %231 = fmul float %39, %40 %232 = bitcast float %79 to i32 %233 = bitcast float %80 to i32 %234 = insertelement <2 x i32> undef, i32 %232, i32 0 %235 = insertelement <2 x i32> %234, i32 %233, i32 1 %236 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %235, <32 x i8> %61, <16 x i8> %64, i32 2) %237 = extractelement <4 x float> %236, i32 0 %238 = extractelement <4 x float> %236, i32 1 %239 = extractelement <4 x float> %236, i32 2 %240 = fmul float %229, %237 %241 = fmul float %230, %238 %242 = fmul float %231, %239 %243 = fmul float %226, %240 %244 = fmul float %227, %241 %245 = fmul float %228, %242 %246 = fmul float %243, %29 %247 = fadd float %246, %26 %248 = fmul float %244, %29 %249 = fadd float %248, %27 %250 = fmul float %245, %29 %251 = fadd float %250, %28 %252 = fmul float %41, %44 %253 = fmul float %42, %44 %254 = fmul float %43, %44 %255 = bitcast float %79 to i32 %256 = bitcast float %80 to i32 %257 = insertelement <2 x i32> undef, i32 %255, i32 0 %258 = insertelement <2 x i32> %257, i32 %256, i32 1 %259 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %258, <32 x i8> %67, <16 x i8> %70, i32 2) %260 = extractelement <4 x float> %259, i32 0 %261 = fmul float %252, %260 %262 = fmul float %253, %260 %263 = fmul float %254, %260 %264 = fmul float %261, %33 %265 = fadd float %264, %30 %266 = fmul float %262, %33 %267 = fadd float %266, %31 %268 = fmul float %263, %33 %269 = fadd float %268, %32 %270 = fmul float %132, %108 %271 = fmul float %133, %109 %272 = fadd float %271, %270 %273 = fmul float %134, %110 %274 = fadd float %272, %273 %275 = call float @llvm.AMDIL.clamp.(float %274, float 0.000000e+00, float 1.000000e+00) %276 = fadd float %275, 0xBEB0C6F7A0000000 %277 = fmul float %144, %108 %278 = fmul float %146, %109 %279 = fadd float %278, %277 %280 = fmul float %148, %110 %281 = fadd float %279, %280 %282 = call float @llvm.AMDIL.clamp.(float %281, float 0.000000e+00, float 1.000000e+00) %283 = fadd float %282, 0xBEB0C6F7A0000000 %284 = call float @fabs(float %282) %285 = call float @llvm.pow.f32(float %284, float %45) %286 = fadd float %45, 8.000000e+00 %287 = fmul float %286, %285 %288 = fmul float %287, 0x3FA45F3060000000 %289 = fmul float %275, %247 %290 = fmul float %275, %249 %291 = fmul float %275, %251 %292 = call float @llvm.AMDGPU.cndlt(float %276, float 0.000000e+00, float %289) %293 = call float @llvm.AMDGPU.cndlt(float %276, float 0.000000e+00, float %290) %294 = call float @llvm.AMDGPU.cndlt(float %276, float 0.000000e+00, float %291) %295 = fmul float %265, %288 %296 = fmul float %267, %288 %297 = fmul float %269, %288 %298 = call float @llvm.AMDGPU.cndlt(float %283, float 0.000000e+00, float %295) %299 = call float @llvm.AMDGPU.cndlt(float %283, float 0.000000e+00, float %296) %300 = call float @llvm.AMDGPU.cndlt(float %283, float 0.000000e+00, float %297) %301 = fadd float %298, %292 %302 = fadd float %299, %293 %303 = fadd float %300, %294 %304 = fmul float %temp12.0, %301 %305 = fmul float %temp13.0, %302 %306 = fmul float %temp14.0, %303 %307 = fmul float %304, %46 %308 = fmul float %305, %47 %309 = fmul float %306, %48 %310 = call i32 @llvm.SI.packf16(float %307, float %308) %311 = bitcast i32 %310 to float %312 = call i32 @llvm.SI.packf16(float %309, float 0.000000e+00) %313 = bitcast i32 %312 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %311, float %313, float %311, float %313) ret void IF33: ; preds = %IF %314 = fmul float %87, %87 %315 = fmul float %88, %88 %316 = fadd float %315, %314 %317 = fmul float %89, %89 %318 = fadd float %316, %317 %319 = call float @fabs(float %318) %320 = call float @llvm.AMDGPU.rsq.clamped.f32(float %319) %321 = call float @llvm.minnum.f32(float %320, float 0x47EFFFFFE0000000) %322 = fdiv float 1.000000e+00, %321 %323 = fsub float %24, %322 %324 = fmul float %323, %25 %325 = call float @llvm.AMDIL.clamp.(float %324, float 0.000000e+00, float 1.000000e+00) %326 = fmul float %325, %325 %327 = call float @llvm.AMDGPU.lrp(float %326, float %223, float %163) %328 = call float @llvm.AMDGPU.lrp(float %326, float %223, float %163) %329 = call float @llvm.AMDGPU.lrp(float %326, float %223, float %163) br label %ENDIF32 ENDIF32: ; preds = %IF, %IF33 %temp24.0 = phi float [ %327, %IF33 ], [ %163, %IF ] %temp25.0 = phi float [ %328, %IF33 ], [ %163, %IF ] %temp26.0 = phi float [ %329, %IF33 ], [ %163, %IF ] %330 = fmul float %220, %temp24.0 %331 = fmul float %221, %temp25.0 %332 = fmul float %222, %temp26.0 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_movk_i32 s0, 0xf00 ; B0000F00 v_mov_b32_e32 v5, 0xb58637bd ; 7E0A02FF B58637BD s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p1_f32 v11, v0, 0, 0, [m0] ; C82C0000 v_interp_p2_f32 v11, [v11], v1, 0, 0, [m0] ; C82D0001 v_interp_p1_f32 v12, v0, 1, 0, [m0] ; C8300100 v_interp_p2_f32 v12, [v12], v1, 1, 0, [m0] ; C8310101 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v4, v0, 0, 2, [m0] ; C8100800 v_interp_p2_f32 v4, [v4], v1, 0, 2, [m0] ; C8110801 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[8:11], s0 ; C2060800 s_buffer_load_dword s3, s[8:11], 0x13 ; C2018913 s_buffer_load_dword s1, s[8:11], 0x17 ; C2008917 s_buffer_load_dword s2, s[8:11], 0x2f ; C201092F s_buffer_load_dword s0, s[8:11], 0x33 ; C2000933 s_buffer_load_dword s13, s[8:11], 0x3c ; C206893C v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_load_dwordx4 s[20:23], s[4:5], 0x10 ; C08A0510 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_load_dwordx8 s[32:39], s[6:7], 0x20 ; C0D00720 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v9, v0, 0, 3, [m0] ; C8240C00 s_buffer_load_dword s14, s[8:11], 0x3d ; C207093D v_interp_p2_f32 v9, [v9], v1, 0, 3, [m0] ; C8250C01 v_interp_p1_f32 v8, v0, 1, 3, [m0] ; C8200D00 v_interp_p2_f32 v8, [v8], v1, 1, 3, [m0] ; C8210D01 v_interp_p1_f32 v10, v0, 2, 3, [m0] ; C8280E00 v_interp_p2_f32 v10, [v10], v1, 2, 3, [m0] ; C8290E01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[16:19] ; F0800700 00860F02 image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[32:39], s[20:23] ; F0800100 00A80B0B s_buffer_load_dword s15, s[8:11], 0x3e ; C207893E s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v11, s13, v11 ; 0616160D v_mul_f32_e32 v11, s14, v11 ; 1016160E v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_and_b32_e32 v12, 0x7fffffff, v11 ; 361816FF 7FFFFFFF v_log_f32_e32 v18, v12 ; 7E244F0C v_mad_f32 v14, 2.0, v15, -1.0 ; D282000E 03CE1EF4 v_mad_f32 v13, 2.0, v16, -1.0 ; D282000D 03CE20F4 v_mad_f32 v12, 2.0, v17, -1.0 ; D282000C 03CE22F4 v_add_f32_e32 v5, v11, v5 ; 060A0B0B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_legacy_f32_e32 v16, s15, v18 ; 0E20240F v_mul_f32_e32 v11, v9, v9 ; 10161309 v_mad_f32 v11, v8, v8, v11 ; D282000B 042E1108 v_mad_f32 v11, v10, v10, v11 ; D282000B 042E150A v_rsq_clamp_f32_e32 v15, v11 ; 7E1E590B v_mul_f32_e32 v11, v4, v4 ; 10160904 v_mad_f32 v11, v6, v6, v11 ; D282000B 042E0D06 v_mad_f32 v11, v7, v7, v11 ; D282000B 042E0F07 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_mul_f32_e32 v17, v14, v14 ; 10221D0E v_mad_f32 v17, v13, v13, v17 ; D2820011 04461B0D v_mad_f32 v17, v12, v12, v17 ; D2820011 0446190C v_rsq_clamp_f32_e32 v18, v17 ; 7E245911 v_exp_f32_e32 v16, v16 ; 7E204B10 v_cmp_gt_f32_e32 vcc, 0, v5 ; 7C080A80 v_cndmask_b32_e64 v5, v16, 0, vcc ; D2000005 01A90110 v_cmp_ne_i32_e64 s[12:13], 0, s12 ; D10A000C 00001880 v_mov_b32_e32 v16, v5 ; 7E200305 v_mov_b32_e32 v17, v5 ; 7E220305 s_and_saveexec_b64 s[12:13], s[12:13] ; BE8C240C s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_cbranch_execz BB0_4 ; BF880000 v_interp_p1_f32 v16, v0, 0, 4, [m0] ; C8401000 v_interp_p2_f32 v16, [v16], v1, 0, 4, [m0] ; C8411001 v_interp_p1_f32 v17, v0, 1, 4, [m0] ; C8441100 v_interp_p2_f32 v17, [v17], v1, 1, 4, [m0] ; C8451101 v_interp_p1_f32 v19, v0, 2, 4, [m0] ; C84C1200 v_interp_p2_f32 v19, [v19], v1, 2, 4, [m0] ; C84D1201 s_buffer_load_dword s14, s[8:11], 0x19 ; C2070919 s_buffer_load_dword s15, s[8:11], 0x1b ; C207891B s_buffer_load_dword s16, s[8:11], 0x1c ; C208091C s_buffer_load_dword s17, s[8:11], 0x1d ; C208891D s_buffer_load_dword s18, s[8:11], 0x4 ; C2090904 s_buffer_load_dword s19, s[8:11], 0x5 ; C2098905 s_buffer_load_dword s20, s[8:11], 0x6 ; C20A0906 s_buffer_load_dword s21, s[8:11], 0x7 ; C20A8907 s_buffer_load_dword s22, s[8:11], 0x18 ; C20B0918 v_interp_p1_f32 v0, v0, 3, 4, [m0] ; C8001300 v_interp_p2_f32 v0, [v0], v1, 3, 4, [m0] ; C8011301 s_buffer_load_dword s23, s[8:11], 0x27 ; C20B8927 s_buffer_load_dword s24, s[8:11], 0x1f ; C20C091F s_buffer_load_dword s25, s[8:11], 0x20 ; C20C8920 s_buffer_load_dword s26, s[8:11], 0x21 ; C20D0921 s_buffer_load_dword s27, s[8:11], 0x23 ; C20D8923 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s16, v17 ; 10022210 v_mul_f32_e32 v20, s17, v17 ; 10282211 v_mad_f32 v1, s22, v16, v1 ; D2820001 04062016 v_mad_f32 v20, s14, v16, v20 ; D2820014 0452200E s_buffer_load_dword s14, s[8:11], 0x24 ; C2070924 s_buffer_load_dword s16, s[8:11], 0x25 ; C2080925 v_mul_f32_e32 v21, s24, v17 ; 102A2218 v_mad_f32 v21, s15, v16, v21 ; D2820015 0456200F v_mad_f32 v1, s25, v19, v1 ; D2820001 04062619 v_mad_f32 v21, s27, v19, v21 ; D2820015 0456261B v_mad_f32 v21, s23, v0, v21 ; D2820015 04560017 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_movk_i32 s15, 0xf04 ; B00F0F04 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_buffer_load_dword s15, s[8:11], s15 ; C207880F v_rcp_f32_e32 v21, v21 ; 7E2A5515 v_mad_f32 v20, s26, v19, v20 ; D2820014 0452261A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s14, v0, v1 ; D2820001 0406000E v_mad_f32 v0, s16, v0, v20 ; D2820000 04520010 v_mul_f32_e32 v1, v1, v21 ; 10022B01 v_mul_f32_e32 v0, v0, v21 ; 10002B00 v_mov_b32_e32 v20, s21 ; 7E280215 v_mad_f32 v20, s18, v1, v20 ; D2820014 04520212 v_mov_b32_e32 v1, s20 ; 7E020214 v_mad_f32 v21, s19, v0, v1 ; D2820015 04060013 image_sample v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[32:39], s[28:31] ; F0800F00 00E81414 v_cmp_ne_i32_e64 s[14:15], 0, s15 ; D10A000E 00001E80 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[14:15], s[14:15] ; BE8E240E s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E s_cbranch_execz BB0_5 ; BF880000 s_buffer_load_dword s16, s[8:11], 0x0 ; C2080900 s_buffer_load_dword s17, s[8:11], 0x1 ; C2088901 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v16, v16 ; 10002110 v_mad_f32 v0, v17, v17, v0 ; D2820000 04022311 v_mad_f32 v0, v19, v19, v0 ; D2820000 04022713 v_rsq_clamp_f32_e64 v0, |v0| ; D3580100 00000100 v_min_f32_e32 v0, 0x7f7fffff, v0 ; 1E0000FF 7F7FFFFF v_rcp_f32_e32 v0, v0 ; 7E005500 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v0, s16, v0 ; 08000010 v_mul_f32_e32 v0, s17, v0 ; 10000011 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v1, v0, v0 ; 10020100 v_mad_f32 v0, -v0, v0, 1.0 ; D2820000 23CA0100 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mad_f32 v5, v1, v23, v0 ; D2820005 04022F01 s_or_b64 exec, exec, s[14:15] ; 88FE0E7E v_mul_f32_e32 v17, v5, v20 ; 10222905 v_mul_f32_e32 v16, v5, v21 ; 10202B05 v_mul_f32_e32 v5, v5, v22 ; 100A2D05 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E v_mov_b32_e32 v0, s3 ; 7E000203 v_min_f32_e32 v1, 0x7f7fffff, v18 ; 1E0224FF 7F7FFFFF s_buffer_load_dword s19, s[8:11], 0x10 ; C2098910 s_buffer_load_dword s17, s[8:11], 0x11 ; C2088911 s_buffer_load_dword s18, s[8:11], 0x12 ; C2090912 s_buffer_load_dword s14, s[8:11], 0x14 ; C2070914 s_buffer_load_dword s15, s[8:11], 0x15 ; C2078915 s_buffer_load_dword s16, s[8:11], 0x16 ; C2080916 s_buffer_load_dword s26, s[8:11], 0x28 ; C20D0928 s_buffer_load_dword s20, s[8:11], 0x29 ; C20A0929 s_buffer_load_dword s21, s[8:11], 0x2a ; C20A892A s_buffer_load_dword s25, s[8:11], 0x2c ; C20C892C s_buffer_load_dword s27, s[8:11], 0x2d ; C20D892D s_buffer_load_dword s28, s[8:11], 0x2e ; C20E092E s_buffer_load_dword s22, s[8:11], 0x30 ; C20B0930 s_buffer_load_dword s23, s[8:11], 0x31 ; C20B8931 s_buffer_load_dword s24, s[8:11], 0x32 ; C20C0932 s_buffer_load_dword s13, s[8:11], 0x34 ; C2068934 s_buffer_load_dword s3, s[8:11], 0x38 ; C2018938 s_buffer_load_dword s12, s[8:11], 0x39 ; C2060939 s_buffer_load_dword s8, s[8:11], 0x3a ; C204093A v_mul_f32_e32 v14, v1, v14 ; 101C1D01 v_mul_f32_e32 v13, v1, v13 ; 101A1B01 v_mul_f32_e32 v12, v1, v12 ; 10181901 v_min_f32_e32 v18, 0x7f7fffff, v15 ; 1E241EFF 7F7FFFFF v_mul_f32_e32 v1, v18, v9 ; 10021312 v_mul_f32_e32 v1, v1, v14 ; 10021D01 v_mul_f32_e32 v15, v18, v8 ; 101E1112 v_mad_f32 v1, v13, v15, v1 ; D2820001 04061F0D v_mul_f32_e32 v15, v18, v10 ; 101E1512 v_mad_f32 v19, v12, v15, v1 ; D2820013 04061F0C v_mul_f32_e32 v1, v14, v19 ; 1002270E v_mad_f32 v1, v19, v14, v1 ; D2820001 04061D13 v_mad_f32 v15, -v9, v18, v1 ; D282000F 24062509 v_mov_b32_e32 v1, s1 ; 7E020201 v_mov_b32_e32 v9, s2 ; 7E120202 v_mul_f32_e32 v20, v13, v19 ; 1028270D s_load_dwordx4 s[32:35], s[4:5], 0x8 ; C0900508 s_load_dwordx4 s[36:39], s[4:5], 0xc ; C092050C s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710 s_load_dwordx8 s[48:55], s[6:7], 0x18 ; C0D80718 v_mad_f32 v20, v19, v13, v20 ; D2820014 04521B13 v_mad_f32 v8, -v8, v18, v20 ; D2820008 24522508 v_mul_f32_e32 v20, v12, v19 ; 1028270C v_mad_f32 v19, v19, v12, v20 ; D2820013 04521913 v_mad_f32 v10, -v10, v18, v19 ; D282000A 244E250A v_min_f32_e32 v18, 0x7f7fffff, v11 ; 1E2416FF 7F7FFFFF v_mul_f32_e32 v11, v18, v4 ; 10160912 v_mul_f32_e32 v6, v18, v6 ; 100C0D12 v_mul_f32_e32 v7, v18, v7 ; 100E0F12 v_mov_b32_e32 v4, s0 ; 7E080200 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[40:47], s[32:35] ; F0800700 010A1202 image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[48:55], s[36:39] ; F0800100 012C0202 v_sub_f32_e64 v3, 1.0, s26 ; D2080003 000034F2 v_mul_f32_e32 v14, v11, v14 ; 101C1D0B v_mad_f32 v13, v13, v6, v14 ; D282000D 043A0D0D v_mad_f32 v12, v12, v7, v13 ; D282000C 04360F0C v_mul_f32_e32 v11, v11, v15 ; 10161F0B v_mad_f32 v6, v8, v6, v11 ; D2820006 042E0D08 v_mad_f32 v6, v10, v7, v6 ; D2820006 041A0F0A v_mul_f32_e32 v7, s25, v9 ; 100E1219 v_mul_f32_e32 v8, s27, v9 ; 1010121B v_mul_f32_e32 v9, s28, v9 ; 1012121C s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v7, v18, v7 ; 100E0F12 v_mul_f32_e32 v8, v19, v8 ; 10101113 v_mul_f32_e32 v9, v20, v9 ; 10121314 v_mul_f32_e32 v3, v7, v3 ; 10060707 v_sub_f32_e64 v7, 1.0, s20 ; D2080007 000028F2 v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_sub_f32_e64 v8, 1.0, s21 ; D2080008 00002AF2 v_mul_f32_e32 v8, v9, v8 ; 10101109 v_mad_f32 v3, v0, v3, s19 ; D2820003 004E0700 v_mad_f32 v7, v7, v0, s17 ; D2820007 00460107 v_mad_f32 v0, v8, v0, s18 ; D2820000 004A0108 v_mul_f32_e32 v8, s22, v4 ; 10100816 v_mul_f32_e32 v9, s23, v4 ; 10120817 v_mul_f32_e32 v4, s24, v4 ; 10080818 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v8, v2, v8 ; 10101102 v_mul_f32_e32 v9, v2, v9 ; 10121302 v_mul_f32_e32 v2, v2, v4 ; 10040902 v_mad_f32 v4, v1, v8, s14 ; D2820004 003A1101 v_mad_f32 v8, v9, v1, s15 ; D2820008 003E0309 v_mad_f32 v1, v2, v1, s16 ; D2820001 00420302 v_add_f32_e64 v2, 0, v12 clamp ; D2060802 00021880 v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80 v_and_b32_e32 v9, 0x7fffffff, v6 ; 36120CFF 7FFFFFFF v_log_f32_e32 v9, v9 ; 7E124F09 v_mov_b32_e32 v10, 0xb58637bd ; 7E1402FF B58637BD v_add_f32_e32 v11, v10, v2 ; 0616050A v_add_f32_e32 v6, v10, v6 ; 060C0D0A v_mul_legacy_f32_e32 v9, s13, v9 ; 0E12120D v_exp_f32_e32 v9, v9 ; 7E124B09 v_mov_b32_e32 v10, 0x41000000 ; 7E1402FF 41000000 v_add_f32_e32 v10, s13, v10 ; 0614140D v_mul_f32_e32 v9, v9, v10 ; 10121509 v_mul_f32_e32 v9, 0x3d22f983, v9 ; 101212FF 3D22F983 v_mul_f32_e32 v3, v3, v2 ; 10060503 v_mul_f32_e32 v7, v7, v2 ; 100E0507 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_cndmask_b32_e64 v2, v3, 0, vcc ; D2000002 01A90103 v_cndmask_b32_e64 v3, v7, 0, vcc ; D2000003 01A90107 v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 v_mul_f32_e32 v4, v9, v4 ; 10080909 v_mul_f32_e32 v7, v9, v8 ; 100E1109 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_cmp_gt_f32_e32 vcc, 0, v6 ; 7C080C80 v_cndmask_b32_e64 v4, v4, 0, vcc ; D2000004 01A90104 v_cndmask_b32_e64 v6, v7, 0, vcc ; D2000006 01A90107 v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 v_add_f32_e32 v2, v2, v4 ; 06040902 v_add_f32_e32 v3, v3, v6 ; 06060D03 v_add_f32_e32 v0, v0, v1 ; 06000300 v_mul_f32_e32 v1, v2, v17 ; 10022302 v_mul_f32_e32 v2, v3, v16 ; 10042103 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mul_f32_e32 v1, s3, v1 ; 10020203 v_mul_f32_e32 v2, s12, v2 ; 1004040C v_mul_f32_e32 v0, s8, v0 ; 10000008 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 24 Code Size: 1412 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], COLOR, PERSPECTIVE DCL IN[1], COLOR[1], PERSPECTIVE DCL IN[2], TEXCOORD[0], PERSPECTIVE DCL IN[3], TEXCOORD[1], PERSPECTIVE DCL IN[4], TEXCOORD[4], PERSPECTIVE DCL IN[5], TEXCOORD[6], PERSPECTIVE DCL IN[6], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..10] IMM[0] FLT32 { 2.0000, -1.0000, -0.0000, 0.0000} IMM[1] FLT32 { 15.0000, 0.9151, 0.0000, 340282346638528859811704183484516925440.0000} 0: DP3 TEMP[0].x, IN[1], IN[1] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[1], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[0], IN[0] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[0], TEMP[0].xxxx 8: MUL TEMP[3].xyz, TEMP[1].zxyw, TEMP[2].yzxw 9: MAD TEMP[3].xyz, TEMP[1].yzxw, TEMP[2].zxyw, -TEMP[3] 10: MUL TEMP[3].xyz, TEMP[3], IN[1].wwww 11: DP3 TEMP[0].x, IN[5], IN[5] 12: RSQ TEMP[0].x, TEMP[0].xxxx 13: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 14: MUL TEMP[4].xyz, IN[5], TEMP[0].xxxx 15: DP3 TEMP[0].x, IN[4], IN[4] 16: RSQ TEMP[0].x, TEMP[0].xxxx 17: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 18: MUL TEMP[5].xyz, IN[4], TEMP[0].xxxx 19: TEX TEMP[6], IN[3], SAMP[1], 2D 20: MAD TEMP[6].xyz, TEMP[6], IMM[0].xxxx, IMM[0].yyyy 21: DP3 TEMP[0].x, TEMP[6], TEMP[6] 22: RSQ TEMP[0].x, TEMP[0].xxxx 23: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 24: MUL TEMP[7].xyz, TEMP[6], TEMP[0].xxxx 25: DP3 TEMP[1].w, TEMP[7], TEMP[4] 26: MUL TEMP[6].xyz, TEMP[1].wwww, TEMP[7] 27: MAD TEMP[4].xyz, TEMP[6], IMM[0].xxxx, -TEMP[4] 28: TEX TEMP[6], IN[2], SAMP[5], 2D 29: ADD TEMP[1].w, TEMP[6].xxxx, CONST[13].xxxx 30: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[13].yyyy 31: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].zzzz 32: POW TEMP[3].w, |TEMP[1].wwww|, CONST[13].zzzz 33: CMP TEMP[6].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[3].wwww 34: UIF CONST[240].xxxx :56 35: MUL TEMP[8].xyz, CONST[7].xyww, IN[6].yyyy 36: MAD TEMP[8].xyz, CONST[6].xyww, IN[6].xxxx, TEMP[8] 37: MAD TEMP[8].xyz, CONST[8].xyww, IN[6].zzzz, TEMP[8] 38: MAD TEMP[8].xyz, CONST[9].xyww, IN[6].wwww, TEMP[8] 39: RCP TEMP[1].w, TEMP[8].zzzz 40: MUL TEMP[8].xy, TEMP[1].wwww, TEMP[8] 41: MAD TEMP[8].xy, TEMP[8], CONST[1], CONST[1].wzzw 42: TEX TEMP[8], TEMP[8], SAMP[0], 2D 43: UIF CONST[240].yyyy :0 44: DP3 TEMP[1].w, IN[6], IN[6] 45: RSQ TEMP[0], |TEMP[1].wwww| 46: MIN TEMP[1].w, IMM[1].wwww, TEMP[0] 47: RCP TEMP[1].w, TEMP[1].wwww 48: ADD TEMP[1].w, -TEMP[1].wwww, CONST[0].xxxx 49: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[0].yyyy 50: MUL TEMP[1].w, TEMP[1].wwww, TEMP[1].wwww 51: LRP TEMP[9].xyz, TEMP[1].wwww, TEMP[8].wwww, TEMP[6].zzzz 52: ELSE :54 53: MOV TEMP[9].xyz, TEMP[6].zzzz 54: ENDIF 55: MUL TEMP[6].xyz, TEMP[8], TEMP[9] 56: ENDIF 57: MOV TEMP[8].y, IMM[0].yyyy 58: ADD TEMP[8].xyz, -TEMP[8].yyyy, -CONST[10] 59: TEX TEMP[9], IN[3], SAMP[2], 2D 60: MUL TEMP[10].xyz, CONST[11], CONST[11].wwww 61: DP3 TEMP[2].x, TEMP[2], TEMP[4] 62: DP3 TEMP[2].y, TEMP[3], TEMP[4] 63: DP3 TEMP[2].z, TEMP[1], TEMP[4] 64: TEX TEMP[1], TEMP[2], SAMP[4], CUBE 65: MUL TEMP[1].xyz, TEMP[1], TEMP[10] 66: MUL_SAT TEMP[1].xyz, TEMP[1], TEMP[9] 67: TEX TEMP[2], IN[3], SAMP[3], 2D 68: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 69: MUL TEMP[1].xyz, TEMP[8], TEMP[1] 70: MAD TEMP[1].xyz, TEMP[1], CONST[4].wwww, CONST[4] 71: DP3_SAT TEMP[1].w, TEMP[7], TEMP[5] 72: ADD TEMP[2].x, TEMP[1].wwww, IMM[0].zzzz 73: DP3_SAT TEMP[2].y, TEMP[4], TEMP[5] 74: ADD TEMP[2].z, TEMP[2].yyyy, IMM[0].zzzz 75: POW TEMP[3].x, |TEMP[2].yyyy|, IMM[1].xxxx 76: MUL TEMP[2].y, TEMP[3].xxxx, IMM[1].yyyy 77: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1] 78: CMP TEMP[1].xyz, TEMP[2].xxxx, IMM[0].wwww, TEMP[1] 79: MUL TEMP[2].xyw, TEMP[2].yyyy, CONST[5].xyzz 80: CMP TEMP[2].xyz, TEMP[2].zzzz, IMM[0].wwww, TEMP[2].xyww 81: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 82: MUL TEMP[1].xyz, TEMP[6], TEMP[1] 83: MUL OUT[0].xyz, TEMP[1], CONST[12] 84: MOV OUT[0].w, IMM[0].wwww 85: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %47 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %48 = bitcast <8 x i32> addrspace(2)* %47 to <32 x i8> addrspace(2)* %49 = load <32 x i8>, <32 x i8> addrspace(2)* %48, align 32, !tbaa !0 %50 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %51 = bitcast <4 x i32> addrspace(2)* %50 to <16 x i8> addrspace(2)* %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %54 = bitcast <8 x i32> addrspace(2)* %53 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %57 = bitcast <4 x i32> addrspace(2)* %56 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %60 = bitcast <8 x i32> addrspace(2)* %59 to <32 x i8> addrspace(2)* %61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %63 = bitcast <4 x i32> addrspace(2)* %62 to <16 x i8> addrspace(2)* %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %72 = bitcast <8 x i32> addrspace(2)* %71 to <32 x i8> addrspace(2)* %73 = load <32 x i8>, <32 x i8> addrspace(2)* %72, align 32, !tbaa !0 %74 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %75 = bitcast <4 x i32> addrspace(2)* %74 to <16 x i8> addrspace(2)* %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 %77 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %91 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %5, <2 x i32> %7) %98 = fmul float %80, %80 %99 = fmul float %81, %81 %100 = fadd float %99, %98 %101 = fmul float %82, %82 %102 = fadd float %100, %101 %103 = call float @llvm.AMDGPU.rsq.clamped.f32(float %102) %104 = call float @llvm.minnum.f32(float %103, float 0x47EFFFFFE0000000) %105 = fmul float %80, %104 %106 = fmul float %81, %104 %107 = fmul float %82, %104 %108 = fmul float %77, %77 %109 = fmul float %78, %78 %110 = fadd float %109, %108 %111 = fmul float %79, %79 %112 = fadd float %110, %111 %113 = call float @llvm.AMDGPU.rsq.clamped.f32(float %112) %114 = call float @llvm.minnum.f32(float %113, float 0x47EFFFFFE0000000) %115 = fmul float %77, %114 %116 = fmul float %78, %114 %117 = fmul float %79, %114 %118 = fmul float %107, %116 %119 = fmul float %105, %117 %120 = fmul float %106, %115 %121 = fmul float %106, %117 %122 = fsub float %121, %118 %123 = fmul float %107, %115 %124 = fsub float %123, %119 %125 = fmul float %105, %116 %126 = fsub float %125, %120 %127 = fmul float %122, %83 %128 = fmul float %124, %83 %129 = fmul float %126, %83 %130 = fmul float %91, %91 %131 = fmul float %92, %92 %132 = fadd float %131, %130 %133 = fmul float %93, %93 %134 = fadd float %132, %133 %135 = call float @llvm.AMDGPU.rsq.clamped.f32(float %134) %136 = call float @llvm.minnum.f32(float %135, float 0x47EFFFFFE0000000) %137 = fmul float %91, %136 %138 = fmul float %92, %136 %139 = fmul float %93, %136 %140 = fmul float %88, %88 %141 = fmul float %89, %89 %142 = fadd float %141, %140 %143 = fmul float %90, %90 %144 = fadd float %142, %143 %145 = call float @llvm.AMDGPU.rsq.clamped.f32(float %144) %146 = call float @llvm.minnum.f32(float %145, float 0x47EFFFFFE0000000) %147 = fmul float %88, %146 %148 = fmul float %89, %146 %149 = fmul float %90, %146 %150 = bitcast float %86 to i32 %151 = bitcast float %87 to i32 %152 = insertelement <2 x i32> undef, i32 %150, i32 0 %153 = insertelement <2 x i32> %152, i32 %151, i32 1 %154 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %153, <32 x i8> %49, <16 x i8> %52, i32 2) %155 = extractelement <4 x float> %154, i32 0 %156 = extractelement <4 x float> %154, i32 1 %157 = extractelement <4 x float> %154, i32 2 %158 = fmul float %155, 2.000000e+00 %159 = fadd float %158, -1.000000e+00 %160 = fmul float %156, 2.000000e+00 %161 = fadd float %160, -1.000000e+00 %162 = fmul float %157, 2.000000e+00 %163 = fadd float %162, -1.000000e+00 %164 = fmul float %159, %159 %165 = fmul float %161, %161 %166 = fadd float %165, %164 %167 = fmul float %163, %163 %168 = fadd float %166, %167 %169 = call float @llvm.AMDGPU.rsq.clamped.f32(float %168) %170 = call float @llvm.minnum.f32(float %169, float 0x47EFFFFFE0000000) %171 = fmul float %159, %170 %172 = fmul float %161, %170 %173 = fmul float %163, %170 %174 = fmul float %171, %137 %175 = fmul float %172, %138 %176 = fadd float %175, %174 %177 = fmul float %173, %139 %178 = fadd float %176, %177 %179 = fmul float %178, %171 %180 = fmul float %178, %172 %181 = fmul float %178, %173 %182 = fmul float %179, 2.000000e+00 %183 = fsub float %182, %137 %184 = fmul float %180, 2.000000e+00 %185 = fsub float %184, %138 %186 = fmul float %181, 2.000000e+00 %187 = fsub float %186, %139 %188 = bitcast float %84 to i32 %189 = bitcast float %85 to i32 %190 = insertelement <2 x i32> undef, i32 %188, i32 0 %191 = insertelement <2 x i32> %190, i32 %189, i32 1 %192 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %191, <32 x i8> %73, <16 x i8> %76, i32 2) %193 = extractelement <4 x float> %192, i32 0 %194 = fadd float %193, %43 %195 = fmul float %194, %44 %196 = call float @llvm.AMDIL.clamp.(float %195, float 0.000000e+00, float 1.000000e+00) %197 = fadd float %196, 0xBEB0C6F7A0000000 %198 = call float @fabs(float %196) %199 = call float @llvm.pow.f32(float %198, float %45) %200 = call float @llvm.AMDGPU.cndlt(float %197, float 0.000000e+00, float %199) %201 = call float @llvm.AMDGPU.cndlt(float %197, float 0.000000e+00, float %199) %202 = call float @llvm.AMDGPU.cndlt(float %197, float 0.000000e+00, float %199) %203 = bitcast float %46 to i32 %204 = icmp eq i32 %203, 0 br i1 %204, label %ENDIF, label %IF IF: ; preds = %main_body %205 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %206 = load <16 x i8>, <16 x i8> addrspace(2)* %205, align 16, !tbaa !0 %207 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %208 = load <32 x i8>, <32 x i8> addrspace(2)* %207, align 32, !tbaa !0 %209 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3844) %210 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %211 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %212 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %213 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %214 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %215 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %216 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %217 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %218 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %219 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %220 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %221 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %222 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %223 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %224 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %225 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %226 = fmul float %218, %95 %227 = fmul float %217, %95 %228 = fmul float %216, %95 %229 = fmul float %221, %94 %230 = fadd float %229, %226 %231 = fmul float %220, %94 %232 = fadd float %231, %227 %233 = fmul float %219, %94 %234 = fadd float %233, %228 %235 = fmul float %215, %96 %236 = fadd float %235, %230 %237 = fmul float %214, %96 %238 = fadd float %237, %232 %239 = fmul float %213, %96 %240 = fadd float %239, %234 %241 = fmul float %212, %97 %242 = fadd float %241, %236 %243 = fmul float %211, %97 %244 = fadd float %243, %238 %245 = fmul float %210, %97 %246 = fadd float %245, %240 %247 = fdiv float 1.000000e+00, %246 %248 = fmul float %247, %242 %249 = fmul float %247, %244 %250 = fmul float %248, %225 %251 = fadd float %250, %222 %252 = fmul float %249, %224 %253 = fadd float %252, %223 %254 = bitcast float %251 to i32 %255 = bitcast float %253 to i32 %256 = insertelement <2 x i32> undef, i32 %254, i32 0 %257 = insertelement <2 x i32> %256, i32 %255, i32 1 %258 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %257, <32 x i8> %208, <16 x i8> %206, i32 2) %259 = extractelement <4 x float> %258, i32 0 %260 = extractelement <4 x float> %258, i32 1 %261 = extractelement <4 x float> %258, i32 2 %262 = extractelement <4 x float> %258, i32 3 %263 = bitcast float %209 to i32 %264 = icmp eq i32 %263, 0 br i1 %264, label %ENDIF44, label %IF45 ENDIF: ; preds = %main_body, %ENDIF44 %temp24.0 = phi float [ %406, %ENDIF44 ], [ %200, %main_body ] %temp25.0 = phi float [ %407, %ENDIF44 ], [ %201, %main_body ] %temp26.0 = phi float [ %408, %ENDIF44 ], [ %202, %main_body ] %265 = fsub float 1.000000e+00, %33 %266 = fsub float 1.000000e+00, %34 %267 = fsub float 1.000000e+00, %35 %268 = bitcast float %86 to i32 %269 = bitcast float %87 to i32 %270 = insertelement <2 x i32> undef, i32 %268, i32 0 %271 = insertelement <2 x i32> %270, i32 %269, i32 1 %272 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %271, <32 x i8> %55, <16 x i8> %58, i32 2) %273 = extractelement <4 x float> %272, i32 0 %274 = extractelement <4 x float> %272, i32 1 %275 = extractelement <4 x float> %272, i32 2 %276 = fmul float %36, %39 %277 = fmul float %37, %39 %278 = fmul float %38, %39 %279 = fmul float %115, %183 %280 = fmul float %116, %185 %281 = fadd float %280, %279 %282 = fmul float %117, %187 %283 = fadd float %281, %282 %284 = fmul float %127, %183 %285 = fmul float %128, %185 %286 = fadd float %285, %284 %287 = fmul float %129, %187 %288 = fadd float %286, %287 %289 = fmul float %105, %183 %290 = fmul float %106, %185 %291 = fadd float %290, %289 %292 = fmul float %107, %187 %293 = fadd float %291, %292 %294 = insertelement <4 x float> undef, float %283, i32 0 %295 = insertelement <4 x float> %294, float %288, i32 1 %296 = insertelement <4 x float> %295, float %293, i32 2 %297 = insertelement <4 x float> %296, float %197, i32 3 %298 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %297) %299 = extractelement <4 x float> %298, i32 0 %300 = extractelement <4 x float> %298, i32 1 %301 = extractelement <4 x float> %298, i32 2 %302 = extractelement <4 x float> %298, i32 3 %303 = call float @fabs(float %301) %304 = fdiv float 1.000000e+00, %303 %305 = fmul float %299, %304 %306 = fadd float %305, 1.500000e+00 %307 = fmul float %300, %304 %308 = fadd float %307, 1.500000e+00 %309 = bitcast float %308 to i32 %310 = bitcast float %306 to i32 %311 = bitcast float %302 to i32 %312 = insertelement <4 x i32> undef, i32 %309, i32 0 %313 = insertelement <4 x i32> %312, i32 %310, i32 1 %314 = insertelement <4 x i32> %313, i32 %311, i32 2 %315 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %314, <32 x i8> %67, <16 x i8> %70, i32 4) %316 = extractelement <4 x float> %315, i32 0 %317 = extractelement <4 x float> %315, i32 1 %318 = extractelement <4 x float> %315, i32 2 %319 = fmul float %316, %276 %320 = fmul float %317, %277 %321 = fmul float %318, %278 %322 = fmul float %319, %273 %323 = fmul float %320, %274 %324 = fmul float %321, %275 %325 = call float @llvm.AMDIL.clamp.(float %322, float 0.000000e+00, float 1.000000e+00) %326 = call float @llvm.AMDIL.clamp.(float %323, float 0.000000e+00, float 1.000000e+00) %327 = call float @llvm.AMDIL.clamp.(float %324, float 0.000000e+00, float 1.000000e+00) %328 = bitcast float %86 to i32 %329 = bitcast float %87 to i32 %330 = insertelement <2 x i32> undef, i32 %328, i32 0 %331 = insertelement <2 x i32> %330, i32 %329, i32 1 %332 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %331, <32 x i8> %61, <16 x i8> %64, i32 2) %333 = extractelement <4 x float> %332, i32 0 %334 = extractelement <4 x float> %332, i32 1 %335 = extractelement <4 x float> %332, i32 2 %336 = fadd float %325, %333 %337 = fadd float %326, %334 %338 = fadd float %327, %335 %339 = fmul float %265, %336 %340 = fmul float %266, %337 %341 = fmul float %267, %338 %342 = fmul float %339, %29 %343 = fadd float %342, %26 %344 = fmul float %340, %29 %345 = fadd float %344, %27 %346 = fmul float %341, %29 %347 = fadd float %346, %28 %348 = fmul float %171, %147 %349 = fmul float %172, %148 %350 = fadd float %349, %348 %351 = fmul float %173, %149 %352 = fadd float %350, %351 %353 = call float @llvm.AMDIL.clamp.(float %352, float 0.000000e+00, float 1.000000e+00) %354 = fadd float %353, 0xBEB0C6F7A0000000 %355 = fmul float %183, %147 %356 = fmul float %185, %148 %357 = fadd float %356, %355 %358 = fmul float %187, %149 %359 = fadd float %357, %358 %360 = call float @llvm.AMDIL.clamp.(float %359, float 0.000000e+00, float 1.000000e+00) %361 = fadd float %360, 0xBEB0C6F7A0000000 %362 = call float @fabs(float %360) %363 = call float @llvm.pow.f32(float %362, float 1.500000e+01) %364 = fmul float %363, 0x3FED48D5A0000000 %365 = fmul float %353, %343 %366 = fmul float %353, %345 %367 = fmul float %353, %347 %368 = call float @llvm.AMDGPU.cndlt(float %354, float 0.000000e+00, float %365) %369 = call float @llvm.AMDGPU.cndlt(float %354, float 0.000000e+00, float %366) %370 = call float @llvm.AMDGPU.cndlt(float %354, float 0.000000e+00, float %367) %371 = fmul float %364, %30 %372 = fmul float %364, %31 %373 = fmul float %364, %32 %374 = call float @llvm.AMDGPU.cndlt(float %361, float 0.000000e+00, float %371) %375 = call float @llvm.AMDGPU.cndlt(float %361, float 0.000000e+00, float %372) %376 = call float @llvm.AMDGPU.cndlt(float %361, float 0.000000e+00, float %373) %377 = fadd float %368, %374 %378 = fadd float %369, %375 %379 = fadd float %370, %376 %380 = fmul float %temp24.0, %377 %381 = fmul float %temp25.0, %378 %382 = fmul float %temp26.0, %379 %383 = fmul float %380, %40 %384 = fmul float %381, %41 %385 = fmul float %382, %42 %386 = call i32 @llvm.SI.packf16(float %383, float %384) %387 = bitcast i32 %386 to float %388 = call i32 @llvm.SI.packf16(float %385, float 0.000000e+00) %389 = bitcast i32 %388 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %387, float %389, float %387, float %389) ret void IF45: ; preds = %IF %390 = fmul float %94, %94 %391 = fmul float %95, %95 %392 = fadd float %391, %390 %393 = fmul float %96, %96 %394 = fadd float %392, %393 %395 = call float @fabs(float %394) %396 = call float @llvm.AMDGPU.rsq.clamped.f32(float %395) %397 = call float @llvm.minnum.f32(float %396, float 0x47EFFFFFE0000000) %398 = fdiv float 1.000000e+00, %397 %399 = fsub float %24, %398 %400 = fmul float %399, %25 %401 = call float @llvm.AMDIL.clamp.(float %400, float 0.000000e+00, float 1.000000e+00) %402 = fmul float %401, %401 %403 = call float @llvm.AMDGPU.lrp(float %402, float %262, float %202) %404 = call float @llvm.AMDGPU.lrp(float %402, float %262, float %202) %405 = call float @llvm.AMDGPU.lrp(float %402, float %262, float %202) br label %ENDIF44 ENDIF44: ; preds = %IF, %IF45 %temp36.0 = phi float [ %403, %IF45 ], [ %202, %IF ] %temp37.0 = phi float [ %404, %IF45 ], [ %202, %IF ] %temp38.0 = phi float [ %405, %IF45 ], [ %202, %IF ] %406 = fmul float %259, %temp36.0 %407 = fmul float %260, %temp37.0 %408 = fmul float %261, %temp38.0 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v6, v0, 1, 0, [m0] ; C8180100 v_interp_p2_f32 v6, [v6], v1, 1, 0, [m0] ; C8190101 v_interp_p1_f32 v7, v0, 2, 0, [m0] ; C81C0200 v_interp_p2_f32 v7, [v7], v1, 2, 0, [m0] ; C81D0201 v_interp_p1_f32 v15, v0, 0, 1, [m0] ; C83C0400 v_interp_p2_f32 v15, [v15], v1, 0, 1, [m0] ; C83D0401 v_interp_p1_f32 v16, v0, 1, 1, [m0] ; C8400500 v_interp_p2_f32 v16, [v16], v1, 1, 1, [m0] ; C8410501 v_interp_p1_f32 v17, v0, 2, 1, [m0] ; C8440600 v_interp_p2_f32 v17, [v17], v1, 2, 1, [m0] ; C8450601 v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700 v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701 v_interp_p1_f32 v18, v0, 0, 2, [m0] ; C8480800 v_interp_p2_f32 v18, [v18], v1, 0, 2, [m0] ; C8490801 v_interp_p1_f32 v19, v0, 1, 2, [m0] ; C84C0900 v_interp_p2_f32 v19, [v19], v1, 1, 2, [m0] ; C84D0901 v_interp_p1_f32 v2, v0, 0, 3, [m0] ; C8080C00 v_interp_p2_f32 v2, [v2], v1, 0, 3, [m0] ; C8090C01 v_interp_p1_f32 v3, v0, 1, 3, [m0] ; C80C0D00 v_interp_p2_f32 v3, [v3], v1, 1, 3, [m0] ; C80D0D01 v_interp_p1_f32 v9, v0, 0, 4, [m0] ; C8241000 v_interp_p2_f32 v9, [v9], v1, 0, 4, [m0] ; C8251001 v_interp_p1_f32 v10, v0, 1, 4, [m0] ; C8281100 v_interp_p2_f32 v10, [v10], v1, 1, 4, [m0] ; C8291101 v_interp_p1_f32 v11, v0, 2, 4, [m0] ; C82C1200 v_interp_p2_f32 v11, [v11], v1, 2, 4, [m0] ; C82D1201 v_interp_p1_f32 v12, v0, 0, 5, [m0] ; C8301400 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[12:19], s[6:7], 0x8 ; C0C60708 s_load_dwordx4 s[20:23], s[4:5], 0x14 ; C08A0514 s_load_dwordx8 s[24:31], s[6:7], 0x28 ; C0CC0728 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s32, s[0:3], 0x34 ; C2100134 s_buffer_load_dword s33, s[0:3], 0x35 ; C2108135 v_interp_p2_f32 v12, [v12], v1, 0, 5, [m0] ; C8311401 v_interp_p1_f32 v13, v0, 1, 5, [m0] ; C8341500 v_interp_p2_f32 v13, [v13], v1, 1, 5, [m0] ; C8351501 v_interp_p1_f32 v14, v0, 2, 5, [m0] ; C8381600 v_interp_p2_f32 v14, [v14], v1, 2, 5, [m0] ; C8391601 image_sample v[20:22], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00431402 s_buffer_load_dword s8, s[0:3], 0x36 ; C2040136 image_sample v5, 1, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[24:31], s[20:23] ; F0800100 00A60512 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v5, s32, v5 ; 060A0A20 v_mul_f32_e32 v5, s33, v5 ; 100A0A21 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_and_b32_e32 v18, 0x7fffffff, v5 ; 36240AFF 7FFFFFFF v_log_f32_e32 v18, v18 ; 7E244F12 v_mad_f32 v24, 2.0, v20, -1.0 ; D2820018 03CE28F4 v_mad_f32 v23, 2.0, v21, -1.0 ; D2820017 03CE2AF4 v_mad_f32 v22, 2.0, v22, -1.0 ; D2820016 03CE2CF4 v_mul_legacy_f32_e32 v19, s8, v18 ; 0E262408 s_movk_i32 s8, 0xf00 ; B0080F00 s_buffer_load_dword s9, s[0:3], s8 ; C2048008 v_mul_f32_e32 v18, v15, v15 ; 10241F0F v_mad_f32 v18, v16, v16, v18 ; D2820012 044A2110 v_mad_f32 v18, v17, v17, v18 ; D2820012 044A2311 v_rsq_clamp_f32_e32 v25, v18 ; 7E325912 v_mul_f32_e32 v18, v4, v4 ; 10240904 v_mad_f32 v18, v6, v6, v18 ; D2820012 044A0D06 v_mad_f32 v18, v7, v7, v18 ; D2820012 044A0F07 v_rsq_clamp_f32_e32 v21, v18 ; 7E2A5912 v_mul_f32_e32 v18, v12, v12 ; 1024190C v_mad_f32 v18, v13, v13, v18 ; D2820012 044A1B0D v_mad_f32 v18, v14, v14, v18 ; D2820012 044A1D0E v_rsq_clamp_f32_e32 v26, v18 ; 7E345912 v_mul_f32_e32 v18, v9, v9 ; 10241309 v_mad_f32 v18, v10, v10, v18 ; D2820012 044A150A v_mad_f32 v18, v11, v11, v18 ; D2820012 044A170B v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113 s_buffer_load_dword s13, s[0:3], 0x2f ; C206812F v_mul_f32_e32 v20, v24, v24 ; 10283118 v_mad_f32 v20, v23, v23, v20 ; D2820014 04522F17 v_mad_f32 v20, v22, v22, v20 ; D2820014 04522D16 v_rsq_clamp_f32_e32 v27, v20 ; 7E365914 v_exp_f32_e32 v20, v19 ; 7E284B13 v_mov_b32_e32 v19, 0xb58637bd ; 7E2602FF B58637BD v_add_f32_e32 v19, v5, v19 ; 06262705 v_cmp_gt_f32_e32 vcc, 0, v19 ; 7C082680 v_cndmask_b32_e64 v5, v20, 0, vcc ; D2000005 01A90114 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ne_i32_e64 s[10:11], 0, s9 ; D10A000A 00001280 v_mov_b32_e32 v19, v5 ; 7E260305 v_mov_b32_e32 v20, v5 ; 7E280305 s_and_saveexec_b64 s[10:11], s[10:11] ; BE8A240A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E s_cbranch_execz BB0_4 ; BF880000 v_interp_p1_f32 v19, v0, 0, 6, [m0] ; C84C1800 v_interp_p2_f32 v19, [v19], v1, 0, 6, [m0] ; C84D1801 v_interp_p1_f32 v20, v0, 1, 6, [m0] ; C8501900 v_interp_p2_f32 v20, [v20], v1, 1, 6, [m0] ; C8511901 v_interp_p1_f32 v28, v0, 2, 6, [m0] ; C8701A00 v_interp_p2_f32 v28, [v28], v1, 2, 6, [m0] ; C8711A01 s_buffer_load_dword s9, s[0:3], 0x19 ; C2048119 s_buffer_load_dword s12, s[0:3], 0x1b ; C206011B s_buffer_load_dword s14, s[0:3], 0x1c ; C207011C s_buffer_load_dword s15, s[0:3], 0x1d ; C207811D s_buffer_load_dword s16, s[0:3], 0x4 ; C2080104 s_buffer_load_dword s17, s[0:3], 0x5 ; C2088105 s_buffer_load_dword s18, s[0:3], 0x6 ; C2090106 s_buffer_load_dword s19, s[0:3], 0x7 ; C2098107 s_buffer_load_dword s20, s[0:3], 0x18 ; C20A0118 v_interp_p1_f32 v0, v0, 3, 6, [m0] ; C8001B00 v_interp_p2_f32 v0, [v0], v1, 3, 6, [m0] ; C8011B01 s_buffer_load_dword s21, s[0:3], 0x27 ; C20A8127 s_buffer_load_dword s22, s[0:3], 0x1f ; C20B011F s_buffer_load_dword s23, s[0:3], 0x20 ; C20B8120 s_buffer_load_dword s24, s[0:3], 0x21 ; C20C0121 s_buffer_load_dword s25, s[0:3], 0x23 ; C20C8123 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s14, v20 ; 1002280E v_mul_f32_e32 v29, s15, v20 ; 103A280F v_mad_f32 v1, s20, v19, v1 ; D2820001 04062614 v_mad_f32 v29, s9, v19, v29 ; D282001D 04762609 s_buffer_load_dword s9, s[0:3], 0x24 ; C2048124 s_buffer_load_dword s14, s[0:3], 0x25 ; C2070125 v_mul_f32_e32 v30, s22, v20 ; 103C2816 v_mad_f32 v30, s12, v19, v30 ; D282001E 047A260C v_mad_f32 v1, s23, v28, v1 ; D2820001 04063817 v_mad_f32 v30, s25, v28, v30 ; D282001E 047A3819 v_mad_f32 v30, s21, v0, v30 ; D282001E 047A0015 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_movk_i32 s12, 0xf04 ; B00C0F04 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_buffer_load_dword s12, s[0:3], s12 ; C206000C v_rcp_f32_e32 v30, v30 ; 7E3C551E v_mad_f32 v29, s24, v28, v29 ; D282001D 04763818 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s9, v0, v1 ; D2820001 04060009 v_mad_f32 v0, s14, v0, v29 ; D2820000 0476000E v_mul_f32_e32 v1, v1, v30 ; 10023D01 v_mul_f32_e32 v0, v0, v30 ; 10003D00 v_mov_b32_e32 v29, s19 ; 7E3A0213 v_mad_f32 v29, s16, v1, v29 ; D282001D 04760210 v_mov_b32_e32 v1, s18 ; 7E020212 v_mad_f32 v30, s17, v0, v1 ; D282001E 04060011 image_sample v[29:32], 15, 0, 0, 0, 0, 0, 0, 0, v[29:30], s[28:35], s[20:23] ; F0800F00 00A71D1D v_cmp_ne_i32_e64 s[14:15], 0, s12 ; D10A000E 00001880 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[14:15], s[14:15] ; BE8E240E s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E s_cbranch_execz BB0_5 ; BF880000 s_buffer_load_dword s9, s[0:3], 0x0 ; C2048100 s_buffer_load_dword s12, s[0:3], 0x1 ; C2060101 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v19, v19 ; 10002713 v_mad_f32 v0, v20, v20, v0 ; D2820000 04022914 v_mad_f32 v0, v28, v28, v0 ; D2820000 0402391C v_rsq_clamp_f32_e64 v0, |v0| ; D3580100 00000100 v_min_f32_e32 v0, 0x7f7fffff, v0 ; 1E0000FF 7F7FFFFF v_rcp_f32_e32 v0, v0 ; 7E005500 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v0, s9, v0 ; 08000009 v_mul_f32_e32 v0, s12, v0 ; 1000000C v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v1, v0, v0 ; 10020100 v_mad_f32 v0, -v0, v0, 1.0 ; D2820000 23CA0100 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mad_f32 v5, v1, v32, v0 ; D2820005 04024101 s_or_b64 exec, exec, s[14:15] ; 88FE0E7E v_mul_f32_e32 v20, v5, v29 ; 10283B05 v_mul_f32_e32 v19, v5, v30 ; 10263D05 v_mul_f32_e32 v5, v5, v31 ; 100A3F05 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E v_mov_b32_e32 v0, s8 ; 7E000208 v_min_f32_e32 v1, 0x7f7fffff, v25 ; 1E0232FF 7F7FFFFF v_mul_f32_e32 v15, v1, v15 ; 101E1F01 v_mul_f32_e32 v25, v1, v16 ; 10322101 v_mul_f32_e32 v28, v1, v17 ; 10382301 v_min_f32_e32 v16, 0x7f7fffff, v26 ; 1E2034FF 7F7FFFFF v_min_f32_e32 v1, 0x7f7fffff, v27 ; 1E0236FF 7F7FFFFF s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 s_buffer_load_dword s15, s[0:3], 0x11 ; C2078111 s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112 s_buffer_load_dword s10, s[0:3], 0x14 ; C2050114 s_buffer_load_dword s11, s[0:3], 0x15 ; C2058115 s_buffer_load_dword s12, s[0:3], 0x16 ; C2060116 s_buffer_load_dword s20, s[0:3], 0x28 ; C20A0128 s_buffer_load_dword s21, s[0:3], 0x29 ; C20A8129 s_buffer_load_dword s22, s[0:3], 0x2a ; C20B012A s_buffer_load_dword s17, s[0:3], 0x2c ; C208812C s_buffer_load_dword s18, s[0:3], 0x2d ; C209012D s_buffer_load_dword s19, s[0:3], 0x2e ; C209812E s_buffer_load_dword s8, s[0:3], 0x30 ; C2040130 s_buffer_load_dword s9, s[0:3], 0x31 ; C2048131 s_buffer_load_dword s0, s[0:3], 0x32 ; C2000132 v_mul_f32_e32 v24, v1, v24 ; 10303101 v_mul_f32_e32 v23, v1, v23 ; 102E2F01 v_mul_f32_e32 v1, v1, v22 ; 10022D01 v_mul_f32_e32 v17, v16, v12 ; 10221910 v_mul_f32_e32 v17, v17, v24 ; 10223111 v_mul_f32_e32 v22, v16, v13 ; 102C1B10 v_mad_f32 v17, v23, v22, v17 ; D2820011 04462D17 v_mul_f32_e32 v22, v16, v14 ; 102C1D10 v_mad_f32 v17, v1, v22, v17 ; D2820011 04462D01 v_mul_f32_e32 v22, v24, v17 ; 102C2318 v_mad_f32 v22, v17, v24, v22 ; D2820016 045A3111 v_mad_f32 v12, -v12, v16, v22 ; D282000C 245A210C v_mul_f32_e32 v22, v23, v17 ; 102C2317 v_mad_f32 v22, v17, v23, v22 ; D2820016 045A2F11 v_mad_f32 v13, -v13, v16, v22 ; D282000D 245A210D v_mul_f32_e32 v22, v1, v17 ; 102C2301 v_mad_f32 v17, v17, v1, v22 ; D2820011 045A0311 v_mad_f32 v14, -v14, v16, v17 ; D282000E 2446210E v_min_f32_e32 v21, 0x7f7fffff, v21 ; 1E2A2AFF 7F7FFFFF v_mul_f32_e32 v16, v21, v4 ; 10200915 v_mul_f32_e32 v17, v21, v6 ; 10220D15 v_mul_f32_e32 v22, v21, v7 ; 102C0F15 v_mov_b32_e32 v21, s13 ; 7E2A020D v_mul_f32_e32 v4, v17, v28 ; 10083911 v_mad_f32 v4, v25, v22, -v4 ; D2820004 84122D19 v_mul_f32_e32 v6, v22, v15 ; 100C1F16 v_mad_f32 v6, v28, v16, -v6 ; D2820006 841A211C s_load_dwordx4 s[44:47], s[4:5], 0x8 ; C0960508 s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C s_load_dwordx8 s[48:55], s[6:7], 0x10 ; C0D80710 s_load_dwordx8 s[28:35], s[6:7], 0x18 ; C0CE0718 s_load_dwordx8 s[36:43], s[6:7], 0x20 ; C0D20720 s_load_dwordx4 s[4:7], s[4:5], 0x10 ; C0820510 v_mul_f32_e32 v7, v16, v25 ; 100E3310 v_mad_f32 v7, v15, v17, -v7 ; D2820007 841E230F v_mul_f32_e32 v26, v8, v4 ; 10340908 v_mul_f32_e32 v27, v8, v6 ; 10360D08 v_mul_f32_e32 v8, v8, v7 ; 10100F08 v_min_f32_e32 v4, 0x7f7fffff, v18 ; 1E0824FF 7F7FFFFF v_mul_f32_e32 v7, v4, v9 ; 100E1304 v_mul_f32_e32 v6, v4, v10 ; 100C1504 v_mul_f32_e32 v4, v4, v11 ; 10081704 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[48:55], s[44:47] ; F0800700 016C0902 v_mul_f32_e32 v16, v12, v16 ; 1020210C v_mad_f32 v16, v17, v13, v16 ; D2820010 04421B11 v_mad_f32 v16, v22, v14, v16 ; D2820010 04421D16 v_mul_f32_e32 v17, v12, v26 ; 1022350C v_mad_f32 v17, v27, v13, v17 ; D2820011 04461B1B v_mad_f32 v17, v8, v14, v17 ; D2820011 04461D08 v_mul_f32_e32 v8, v12, v15 ; 10101F0C v_mad_f32 v8, v25, v13, v8 ; D2820008 04221B19 v_mad_f32 v18, v28, v14, v8 ; D2820012 04221D1C v_mul_f32_e32 v8, v7, v24 ; 10103107 v_mad_f32 v8, v23, v6, v8 ; D2820008 04220D17 v_sub_f32_e64 v15, 1.0, s20 ; D208000F 000028F2 v_sub_f32_e64 v22, 1.0, s21 ; D2080016 00002AF2 v_sub_f32_e64 v23, 1.0, s22 ; D2080017 00002CF2 v_cubeid_f32 v27, v16, v17, v18 ; D288001B 044A2310 v_cubema_f32 v26, v16, v17, v18 ; D28E001A 044A2310 v_cubesc_f32 v25, v16, v17, v18 ; D28A0019 044A2310 v_cubetc_f32 v24, v16, v17, v18 ; D28C0018 044A2310 v_rcp_f32_e64 v16, |v26| ; D3540110 0000011A v_mov_b32_e32 v17, 0x3fc00000 ; 7E2202FF 3FC00000 v_mad_f32 v26, v24, v16, v17 ; D282001A 04462118 v_mad_f32 v25, v25, v16, v17 ; D2820019 04462119 image_sample v[16:18], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[36:43], s[4:7] ; F0800700 00291019 v_mul_f32_e32 v24, s17, v21 ; 10302A11 v_mul_f32_e32 v25, s18, v21 ; 10322A12 v_mul_f32_e32 v21, s19, v21 ; 102A2A13 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v16, v24, v16 ; 10202118 v_mul_f32_e32 v17, v25, v17 ; 10222319 v_mul_f32_e32 v18, v21, v18 ; 10242515 v_mul_f32_e32 v9, v9, v16 ; 10122109 v_mul_f32_e32 v10, v10, v17 ; 1014230A v_mul_f32_e32 v11, v11, v18 ; 1016250B image_sample v[16:18], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[24:27] ; F0800700 00C71002 v_add_f32_e64 v2, 0, v9 clamp ; D2060802 00021280 v_add_f32_e64 v3, 0, v10 clamp ; D2060803 00021480 v_add_f32_e64 v9, 0, v11 clamp ; D2060809 00021680 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v2, v16, v2 ; 06040510 v_add_f32_e32 v3, v17, v3 ; 06060711 v_add_f32_e32 v9, v18, v9 ; 06121312 v_mul_f32_e32 v2, v2, v15 ; 10041F02 v_mul_f32_e32 v3, v3, v22 ; 10062D03 v_mul_f32_e32 v9, v9, v23 ; 10122F09 v_mad_f32 v2, v0, v2, s16 ; D2820002 00420500 v_mad_f32 v3, v3, v0, s15 ; D2820003 003E0103 v_mad_f32 v0, v9, v0, s14 ; D2820000 003A0109 v_mad_f32 v1, v1, v4, v8 ; D2820001 04220901 v_mul_f32_e32 v7, v7, v12 ; 100E1907 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mad_f32 v6, v13, v6, v7 ; D2820006 041E0D0D v_mad_f32 v4, v14, v4, v6 ; D2820004 041A090E v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_and_b32_e32 v6, 0x7fffffff, v4 ; 360C08FF 7FFFFFFF v_log_f32_e32 v6, v6 ; 7E0C4F06 v_mov_b32_e32 v7, 0xb58637bd ; 7E0E02FF B58637BD v_add_f32_e32 v8, v7, v1 ; 06100307 v_add_f32_e32 v4, v7, v4 ; 06080907 v_mul_legacy_f32_e32 v6, 0x41700000, v6 ; 0E0C0CFF 41700000 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_mul_f32_e32 v6, 0x3f6a46ad, v6 ; 100C0CFF 3F6A46AD v_mul_f32_e32 v2, v2, v1 ; 10040302 v_mul_f32_e32 v3, v3, v1 ; 10060303 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_cmp_gt_f32_e32 vcc, 0, v8 ; 7C081080 v_cndmask_b32_e64 v1, v2, 0, vcc ; D2000001 01A90102 v_cndmask_b32_e64 v2, v3, 0, vcc ; D2000002 01A90103 v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 v_mul_f32_e32 v3, s10, v6 ; 10060C0A v_mul_f32_e32 v7, s11, v6 ; 100E0C0B v_mul_f32_e32 v6, s12, v6 ; 100C0C0C v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_cndmask_b32_e64 v4, v7, 0, vcc ; D2000004 01A90107 v_cndmask_b32_e64 v6, v6, 0, vcc ; D2000006 01A90106 v_add_f32_e32 v1, v3, v1 ; 06020303 v_add_f32_e32 v2, v4, v2 ; 06040504 v_add_f32_e32 v0, v6, v0 ; 06000106 v_mul_f32_e32 v1, v1, v20 ; 10022901 v_mul_f32_e32 v2, v2, v19 ; 10042702 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mul_f32_e32 v1, s8, v1 ; 10020208 v_mul_f32_e32 v2, s9, v2 ; 10040409 v_mul_f32_e32 v0, s0, v0 ; 10000000 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 36 Code Size: 1696 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], COLOR, PERSPECTIVE DCL IN[1], COLOR[1], PERSPECTIVE DCL IN[2], TEXCOORD[0], PERSPECTIVE DCL IN[3], TEXCOORD[1], PERSPECTIVE DCL IN[4], TEXCOORD[4], PERSPECTIVE DCL IN[5], TEXCOORD[5], PERSPECTIVE DCL IN[6], TEXCOORD[6], PERSPECTIVE DCL IN[7], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..9] IMM[0] FLT32 { 2.0000, -1.0000, -0.0000, 0.0000} IMM[1] FLT32 { 8.0000, 0.0398, 0.0000, 340282346638528859811704183484516925440.0000} 0: DP3 TEMP[0].x, IN[1], IN[1] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[1], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[0], IN[0] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[0], TEMP[0].xxxx 8: MUL TEMP[3].xyz, TEMP[1].zxyw, TEMP[2].yzxw 9: MAD TEMP[3].xyz, TEMP[1].yzxw, TEMP[2].zxyw, -TEMP[3] 10: MUL TEMP[3].xyz, TEMP[3], IN[1].wwww 11: DP3 TEMP[0].x, IN[6], IN[6] 12: RSQ TEMP[0].x, TEMP[0].xxxx 13: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 14: MUL TEMP[4].xyz, IN[6], TEMP[0].xxxx 15: DP3 TEMP[0].x, IN[4], IN[4] 16: RSQ TEMP[0].x, TEMP[0].xxxx 17: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 18: MUL TEMP[5].xyz, IN[4], TEMP[0].xxxx 19: TEX TEMP[6], IN[3], SAMP[2], 2D 20: MAD TEMP[6].xyz, TEMP[6], IMM[0].xxxx, IMM[0].yyyy 21: DP3 TEMP[0].x, TEMP[6], TEMP[6] 22: RSQ TEMP[0].x, TEMP[0].xxxx 23: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 24: MUL TEMP[7].xyz, TEMP[6], TEMP[0].xxxx 25: DP3 TEMP[1].w, TEMP[7], TEMP[4] 26: MUL TEMP[6].xyz, TEMP[1].wwww, TEMP[7] 27: MAD TEMP[4].xyz, TEMP[6], IMM[0].xxxx, -TEMP[4] 28: TEX TEMP[6], IN[2], SAMP[6], 2D 29: ADD TEMP[1].w, TEMP[6].xxxx, CONST[9].xxxx 30: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[9].yyyy 31: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].zzzz 32: POW TEMP[3].w, |TEMP[1].wwww|, CONST[9].zzzz 33: CMP TEMP[6].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[3].wwww 34: DP3 TEMP[1].w, IN[5], IN[5] 35: RSQ TEMP[0], |TEMP[1].wwww| 36: MIN TEMP[2].w, IMM[1].wwww, TEMP[0] 37: MUL TEMP[8].xyz, TEMP[2].wwww, IN[5] 38: DP3 TEMP[2].w, TEMP[8], -CONST[10] 39: ADD TEMP[2].w, TEMP[2].wwww, -CONST[11].xxxx 40: MUL_SAT TEMP[2].w, TEMP[2].wwww, CONST[11].yyyy 41: MUL TEMP[2].w, TEMP[2].wwww, TEMP[2].wwww 42: ADD TEMP[1].w, -TEMP[1].wwww, -IMM[0].yyyy 43: MUL TEMP[3].w, TEMP[6].zzzz, TEMP[1].wwww 44: MUL TEMP[8], TEMP[2].wwww, TEMP[3].wwww 45: MOV TEMP[9], TEMP[8] 46: KILL_IF TEMP[9] 47: UIF CONST[240].xxxx :53 48: RCP TEMP[3].w, IN[7].wwww 49: MUL TEMP[8].xy, TEMP[3].wwww, IN[7] 50: MAD TEMP[8].xy, TEMP[8], CONST[1], CONST[1].wzzw 51: TEX TEMP[9], TEMP[8], SAMP[0], 2D 52: MUL TEMP[6].xyz, TEMP[6].zzzz, TEMP[9] 53: ENDIF 54: MOV TEMP[8].y, IMM[0].yyyy 55: ADD TEMP[8].xyz, -TEMP[8].yyyy, -CONST[0] 56: TEX TEMP[9], IN[3], SAMP[3], 2D 57: MUL TEMP[9].yzw, CONST[6].xxyz, CONST[6].wwww 58: DP3 TEMP[2].x, TEMP[2], TEMP[4] 59: DP3 TEMP[2].y, TEMP[3], TEMP[4] 60: DP3 TEMP[2].z, TEMP[1], TEMP[4] 61: TEX TEMP[3], TEMP[2], SAMP[1], CUBE 62: MUL TEMP[1].xyz, TEMP[3], TEMP[9].yzww 63: TEX TEMP[3], IN[3], SAMP[4], 2D 64: MAD TEMP[1].xyz, TEMP[9].xxxx, TEMP[1], TEMP[3] 65: MUL TEMP[1].xyz, TEMP[8], TEMP[1] 66: MAD TEMP[1].xyz, TEMP[1], CONST[4].wwww, CONST[4] 67: MAX TEMP[2].x, TEMP[1].wwww, IMM[0].wwww 68: ADD TEMP[1].w, TEMP[2].xxxx, IMM[0].zzzz 69: POW TEMP[3].x, |TEMP[2].xxxx|, CONST[8].wwww 70: CMP TEMP[1].w, TEMP[1].wwww, IMM[0].wwww, TEMP[3].xxxx 71: TEX TEMP[3], IN[3], SAMP[5], 2D 72: MAD TEMP[2].xyz, TEMP[3], CONST[5].wwww, CONST[5] 73: DP3_SAT TEMP[3].x, TEMP[7], TEMP[5] 74: DP3_SAT TEMP[3].z, TEMP[4], TEMP[5] 75: ADD TEMP[3].yw, TEMP[3].xxzz, IMM[0].zzzz 76: POW TEMP[4].x, |TEMP[3].zzzz|, CONST[7].xxxx 77: MOV TEMP[5].x, CONST[7].xxxx 78: ADD TEMP[3].z, TEMP[5].xxxx, IMM[1].xxxx 79: MUL TEMP[3].z, TEMP[3].zzzz, TEMP[4].xxxx 80: MUL TEMP[3].z, TEMP[3].zzzz, IMM[1].yyyy 81: MUL TEMP[1].xyz, TEMP[1], TEMP[3].xxxx 82: CMP TEMP[1].xyz, TEMP[3].yyyy, IMM[0].wwww, TEMP[1] 83: MUL TEMP[2].xyz, TEMP[2], TEMP[3].zzzz 84: CMP TEMP[2].xyz, TEMP[3].wwww, IMM[0].wwww, TEMP[2] 85: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 86: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1] 87: MUL TEMP[1].xyz, TEMP[6], TEMP[1] 88: MUL TEMP[1].xyz, TEMP[1], CONST[8] 89: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[1] 90: CMP OUT[0].xyz, -TEMP[8].wwww, TEMP[1], IMM[0].wwww 91: MOV OUT[0].w, IMM[0].wwww 92: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %53 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %54 = load <32 x i8>, <32 x i8> addrspace(2)* %53, align 32, !tbaa !0 %55 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 %57 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %58 = bitcast <8 x i32> addrspace(2)* %57 to <32 x i8> addrspace(2)* %59 = load <32 x i8>, <32 x i8> addrspace(2)* %58, align 32, !tbaa !0 %60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %61 = bitcast <4 x i32> addrspace(2)* %60 to <16 x i8> addrspace(2)* %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %64 = bitcast <8 x i32> addrspace(2)* %63 to <32 x i8> addrspace(2)* %65 = load <32 x i8>, <32 x i8> addrspace(2)* %64, align 32, !tbaa !0 %66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %67 = bitcast <4 x i32> addrspace(2)* %66 to <16 x i8> addrspace(2)* %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %70 = bitcast <8 x i32> addrspace(2)* %69 to <32 x i8> addrspace(2)* %71 = load <32 x i8>, <32 x i8> addrspace(2)* %70, align 32, !tbaa !0 %72 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %73 = bitcast <4 x i32> addrspace(2)* %72 to <16 x i8> addrspace(2)* %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %76 = bitcast <8 x i32> addrspace(2)* %75 to <32 x i8> addrspace(2)* %77 = load <32 x i8>, <32 x i8> addrspace(2)* %76, align 32, !tbaa !0 %78 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %79 = bitcast <4 x i32> addrspace(2)* %78 to <16 x i8> addrspace(2)* %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %82 = bitcast <8 x i32> addrspace(2)* %81 to <32 x i8> addrspace(2)* %83 = load <32 x i8>, <32 x i8> addrspace(2)* %82, align 32, !tbaa !0 %84 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %85 = bitcast <4 x i32> addrspace(2)* %84 to <16 x i8> addrspace(2)* %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 %87 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %88 = bitcast <8 x i32> addrspace(2)* %87 to <32 x i8> addrspace(2)* %89 = load <32 x i8>, <32 x i8> addrspace(2)* %88, align 32, !tbaa !0 %90 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %91 = bitcast <4 x i32> addrspace(2)* %90 to <16 x i8> addrspace(2)* %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0 %93 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %113 = fmul float %96, %96 %114 = fmul float %97, %97 %115 = fadd float %114, %113 %116 = fmul float %98, %98 %117 = fadd float %115, %116 %118 = call float @llvm.AMDGPU.rsq.clamped.f32(float %117) %119 = call float @llvm.minnum.f32(float %118, float 0x47EFFFFFE0000000) %120 = fmul float %96, %119 %121 = fmul float %97, %119 %122 = fmul float %98, %119 %123 = fmul float %93, %93 %124 = fmul float %94, %94 %125 = fadd float %124, %123 %126 = fmul float %95, %95 %127 = fadd float %125, %126 %128 = call float @llvm.AMDGPU.rsq.clamped.f32(float %127) %129 = call float @llvm.minnum.f32(float %128, float 0x47EFFFFFE0000000) %130 = fmul float %93, %129 %131 = fmul float %94, %129 %132 = fmul float %95, %129 %133 = fmul float %122, %131 %134 = fmul float %120, %132 %135 = fmul float %121, %130 %136 = fmul float %121, %132 %137 = fsub float %136, %133 %138 = fmul float %122, %130 %139 = fsub float %138, %134 %140 = fmul float %120, %131 %141 = fsub float %140, %135 %142 = fmul float %137, %99 %143 = fmul float %139, %99 %144 = fmul float %141, %99 %145 = fmul float %110, %110 %146 = fmul float %111, %111 %147 = fadd float %146, %145 %148 = fmul float %112, %112 %149 = fadd float %147, %148 %150 = call float @llvm.AMDGPU.rsq.clamped.f32(float %149) %151 = call float @llvm.minnum.f32(float %150, float 0x47EFFFFFE0000000) %152 = fmul float %110, %151 %153 = fmul float %111, %151 %154 = fmul float %112, %151 %155 = fmul float %104, %104 %156 = fmul float %105, %105 %157 = fadd float %156, %155 %158 = fmul float %106, %106 %159 = fadd float %157, %158 %160 = call float @llvm.AMDGPU.rsq.clamped.f32(float %159) %161 = call float @llvm.minnum.f32(float %160, float 0x47EFFFFFE0000000) %162 = fmul float %104, %161 %163 = fmul float %105, %161 %164 = fmul float %106, %161 %165 = bitcast float %102 to i32 %166 = bitcast float %103 to i32 %167 = insertelement <2 x i32> undef, i32 %165, i32 0 %168 = insertelement <2 x i32> %167, i32 %166, i32 1 %169 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %168, <32 x i8> %65, <16 x i8> %68, i32 2) %170 = extractelement <4 x float> %169, i32 0 %171 = extractelement <4 x float> %169, i32 1 %172 = extractelement <4 x float> %169, i32 2 %173 = fmul float %170, 2.000000e+00 %174 = fadd float %173, -1.000000e+00 %175 = fmul float %171, 2.000000e+00 %176 = fadd float %175, -1.000000e+00 %177 = fmul float %172, 2.000000e+00 %178 = fadd float %177, -1.000000e+00 %179 = fmul float %174, %174 %180 = fmul float %176, %176 %181 = fadd float %180, %179 %182 = fmul float %178, %178 %183 = fadd float %181, %182 %184 = call float @llvm.AMDGPU.rsq.clamped.f32(float %183) %185 = call float @llvm.minnum.f32(float %184, float 0x47EFFFFFE0000000) %186 = fmul float %174, %185 %187 = fmul float %176, %185 %188 = fmul float %178, %185 %189 = fmul float %186, %152 %190 = fmul float %187, %153 %191 = fadd float %190, %189 %192 = fmul float %188, %154 %193 = fadd float %191, %192 %194 = fmul float %193, %186 %195 = fmul float %193, %187 %196 = fmul float %193, %188 %197 = fmul float %194, 2.000000e+00 %198 = fsub float %197, %152 %199 = fmul float %195, 2.000000e+00 %200 = fsub float %199, %153 %201 = fmul float %196, 2.000000e+00 %202 = fsub float %201, %154 %203 = bitcast float %100 to i32 %204 = bitcast float %101 to i32 %205 = insertelement <2 x i32> undef, i32 %203, i32 0 %206 = insertelement <2 x i32> %205, i32 %204, i32 1 %207 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %206, <32 x i8> %89, <16 x i8> %92, i32 2) %208 = extractelement <4 x float> %207, i32 0 %209 = fadd float %208, %44 %210 = fmul float %209, %45 %211 = call float @llvm.AMDIL.clamp.(float %210, float 0.000000e+00, float 1.000000e+00) %212 = fadd float %211, 0xBEB0C6F7A0000000 %213 = call float @fabs(float %211) %214 = call float @llvm.pow.f32(float %213, float %46) %215 = call float @llvm.AMDGPU.cndlt(float %212, float 0.000000e+00, float %214) %216 = call float @llvm.AMDGPU.cndlt(float %212, float 0.000000e+00, float %214) %217 = call float @llvm.AMDGPU.cndlt(float %212, float 0.000000e+00, float %214) %218 = fmul float %107, %107 %219 = fmul float %108, %108 %220 = fadd float %219, %218 %221 = fmul float %109, %109 %222 = fadd float %220, %221 %223 = call float @fabs(float %222) %224 = call float @llvm.AMDGPU.rsq.clamped.f32(float %223) %225 = call float @llvm.minnum.f32(float %224, float 0x47EFFFFFE0000000) %226 = fmul float %225, %107 %227 = fmul float %225, %108 %228 = fmul float %225, %109 %229 = fmul float %47, %226 %230 = fsub float -0.000000e+00, %229 %231 = fmul float %48, %227 %232 = fsub float %230, %231 %233 = fmul float %49, %228 %234 = fsub float %232, %233 %235 = fsub float %234, %50 %236 = fmul float %235, %51 %237 = call float @llvm.AMDIL.clamp.(float %236, float 0.000000e+00, float 1.000000e+00) %238 = fmul float %237, %237 %239 = fsub float 1.000000e+00, %222 %240 = fmul float %217, %239 %241 = fmul float %238, %240 %242 = fmul float %238, %240 %243 = fmul float %238, %240 %244 = fmul float %238, %240 %245 = fcmp olt float %241, 0.000000e+00 %246 = fcmp olt float %242, 0.000000e+00 %247 = fcmp olt float %243, 0.000000e+00 %248 = fcmp olt float %244, 0.000000e+00 %249 = or i1 %248, %247 %250 = or i1 %249, %246 %251 = or i1 %250, %245 %252 = select i1 %251, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %252) %253 = bitcast float %52 to i32 %254 = icmp eq i32 %253, 0 br i1 %254, label %ENDIF, label %IF IF: ; preds = %main_body %255 = call float @llvm.SI.fs.interp(i32 3, i32 7, i32 %5, <2 x i32> %7) %256 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %5, <2 x i32> %7) %257 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %5, <2 x i32> %7) %258 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %259 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %260 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %261 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %262 = fdiv float 1.000000e+00, %255 %263 = fmul float %262, %257 %264 = fmul float %262, %256 %265 = fmul float %263, %261 %266 = fadd float %265, %258 %267 = fmul float %264, %260 %268 = fadd float %267, %259 %269 = bitcast float %266 to i32 %270 = bitcast float %268 to i32 %271 = insertelement <2 x i32> undef, i32 %269, i32 0 %272 = insertelement <2 x i32> %271, i32 %270, i32 1 %273 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %272, <32 x i8> %54, <16 x i8> %56, i32 2) %274 = extractelement <4 x float> %273, i32 0 %275 = extractelement <4 x float> %273, i32 1 %276 = extractelement <4 x float> %273, i32 2 %277 = fmul float %217, %274 %278 = fmul float %217, %275 %279 = fmul float %217, %276 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp24.0 = phi float [ %277, %IF ], [ %215, %main_body ] %temp25.0 = phi float [ %278, %IF ], [ %216, %main_body ] %temp26.0 = phi float [ %279, %IF ], [ %217, %main_body ] %280 = fsub float 1.000000e+00, %24 %281 = fsub float 1.000000e+00, %25 %282 = fsub float 1.000000e+00, %26 %283 = bitcast float %102 to i32 %284 = bitcast float %103 to i32 %285 = insertelement <2 x i32> undef, i32 %283, i32 0 %286 = insertelement <2 x i32> %285, i32 %284, i32 1 %287 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %286, <32 x i8> %71, <16 x i8> %74, i32 2) %288 = extractelement <4 x float> %287, i32 0 %289 = fmul float %35, %38 %290 = fmul float %36, %38 %291 = fmul float %37, %38 %292 = fmul float %130, %198 %293 = fmul float %131, %200 %294 = fadd float %293, %292 %295 = fmul float %132, %202 %296 = fadd float %294, %295 %297 = fmul float %142, %198 %298 = fmul float %143, %200 %299 = fadd float %298, %297 %300 = fmul float %144, %202 %301 = fadd float %299, %300 %302 = fmul float %120, %198 %303 = fmul float %121, %200 %304 = fadd float %303, %302 %305 = fmul float %122, %202 %306 = fadd float %304, %305 %307 = insertelement <4 x float> undef, float %296, i32 0 %308 = insertelement <4 x float> %307, float %301, i32 1 %309 = insertelement <4 x float> %308, float %306, i32 2 %310 = insertelement <4 x float> %309, float %238, i32 3 %311 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %310) %312 = extractelement <4 x float> %311, i32 0 %313 = extractelement <4 x float> %311, i32 1 %314 = extractelement <4 x float> %311, i32 2 %315 = extractelement <4 x float> %311, i32 3 %316 = call float @fabs(float %314) %317 = fdiv float 1.000000e+00, %316 %318 = fmul float %312, %317 %319 = fadd float %318, 1.500000e+00 %320 = fmul float %313, %317 %321 = fadd float %320, 1.500000e+00 %322 = bitcast float %321 to i32 %323 = bitcast float %319 to i32 %324 = bitcast float %315 to i32 %325 = insertelement <4 x i32> undef, i32 %322, i32 0 %326 = insertelement <4 x i32> %325, i32 %323, i32 1 %327 = insertelement <4 x i32> %326, i32 %324, i32 2 %328 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %327, <32 x i8> %59, <16 x i8> %62, i32 4) %329 = extractelement <4 x float> %328, i32 0 %330 = extractelement <4 x float> %328, i32 1 %331 = extractelement <4 x float> %328, i32 2 %332 = fmul float %329, %289 %333 = fmul float %330, %290 %334 = fmul float %331, %291 %335 = bitcast float %102 to i32 %336 = bitcast float %103 to i32 %337 = insertelement <2 x i32> undef, i32 %335, i32 0 %338 = insertelement <2 x i32> %337, i32 %336, i32 1 %339 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %338, <32 x i8> %77, <16 x i8> %80, i32 2) %340 = extractelement <4 x float> %339, i32 0 %341 = extractelement <4 x float> %339, i32 1 %342 = extractelement <4 x float> %339, i32 2 %343 = fmul float %288, %332 %344 = fadd float %343, %340 %345 = fmul float %288, %333 %346 = fadd float %345, %341 %347 = fmul float %288, %334 %348 = fadd float %347, %342 %349 = fmul float %280, %344 %350 = fmul float %281, %346 %351 = fmul float %282, %348 %352 = fmul float %349, %30 %353 = fadd float %352, %27 %354 = fmul float %350, %30 %355 = fadd float %354, %28 %356 = fmul float %351, %30 %357 = fadd float %356, %29 %358 = call float @llvm.maxnum.f32(float %239, float 0.000000e+00) %359 = fadd float %358, 0xBEB0C6F7A0000000 %360 = call float @fabs(float %358) %361 = call float @llvm.pow.f32(float %360, float %43) %362 = call float @llvm.AMDGPU.cndlt(float %359, float 0.000000e+00, float %361) %363 = bitcast float %102 to i32 %364 = bitcast float %103 to i32 %365 = insertelement <2 x i32> undef, i32 %363, i32 0 %366 = insertelement <2 x i32> %365, i32 %364, i32 1 %367 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %366, <32 x i8> %83, <16 x i8> %86, i32 2) %368 = extractelement <4 x float> %367, i32 0 %369 = extractelement <4 x float> %367, i32 1 %370 = extractelement <4 x float> %367, i32 2 %371 = fmul float %368, %34 %372 = fadd float %371, %31 %373 = fmul float %369, %34 %374 = fadd float %373, %32 %375 = fmul float %370, %34 %376 = fadd float %375, %33 %377 = fmul float %186, %162 %378 = fmul float %187, %163 %379 = fadd float %378, %377 %380 = fmul float %188, %164 %381 = fadd float %379, %380 %382 = call float @llvm.AMDIL.clamp.(float %381, float 0.000000e+00, float 1.000000e+00) %383 = fmul float %198, %162 %384 = fmul float %200, %163 %385 = fadd float %384, %383 %386 = fmul float %202, %164 %387 = fadd float %385, %386 %388 = call float @llvm.AMDIL.clamp.(float %387, float 0.000000e+00, float 1.000000e+00) %389 = fadd float %382, 0xBEB0C6F7A0000000 %390 = fadd float %388, 0xBEB0C6F7A0000000 %391 = call float @fabs(float %388) %392 = call float @llvm.pow.f32(float %391, float %39) %393 = fadd float %39, 8.000000e+00 %394 = fmul float %393, %392 %395 = fmul float %394, 0x3FA45F3060000000 %396 = fmul float %353, %382 %397 = fmul float %355, %382 %398 = fmul float %357, %382 %399 = call float @llvm.AMDGPU.cndlt(float %389, float 0.000000e+00, float %396) %400 = call float @llvm.AMDGPU.cndlt(float %389, float 0.000000e+00, float %397) %401 = call float @llvm.AMDGPU.cndlt(float %389, float 0.000000e+00, float %398) %402 = fmul float %372, %395 %403 = fmul float %374, %395 %404 = fmul float %376, %395 %405 = call float @llvm.AMDGPU.cndlt(float %390, float 0.000000e+00, float %402) %406 = call float @llvm.AMDGPU.cndlt(float %390, float 0.000000e+00, float %403) %407 = call float @llvm.AMDGPU.cndlt(float %390, float 0.000000e+00, float %404) %408 = fadd float %399, %405 %409 = fadd float %400, %406 %410 = fadd float %401, %407 %411 = fmul float %362, %408 %412 = fmul float %362, %409 %413 = fmul float %362, %410 %414 = fmul float %temp24.0, %411 %415 = fmul float %temp25.0, %412 %416 = fmul float %temp26.0, %413 %417 = fmul float %414, %40 %418 = fmul float %415, %41 %419 = fmul float %416, %42 %420 = fmul float %238, %417 %421 = fmul float %238, %418 %422 = fmul float %238, %419 %423 = fsub float -0.000000e+00, %244 %424 = call float @llvm.AMDGPU.cndlt(float %423, float %420, float 0.000000e+00) %425 = fsub float -0.000000e+00, %244 %426 = call float @llvm.AMDGPU.cndlt(float %425, float %421, float 0.000000e+00) %427 = fsub float -0.000000e+00, %244 %428 = call float @llvm.AMDGPU.cndlt(float %427, float %422, float 0.000000e+00) %429 = call i32 @llvm.SI.packf16(float %424, float %426) %430 = bitcast i32 %429 to float %431 = call i32 @llvm.SI.packf16(float %428, float 0.000000e+00) %432 = bitcast i32 %431 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %430, float %432, float %430, float %432) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_movk_i32 s32, 0xf00 ; B0200F00 s_load_dwordx4 s[8:11], s[4:5], 0x8 ; C0840508 v_interp_p1_f32 v9, v0, 0, 0, [m0] ; C8240000 v_interp_p2_f32 v9, [v9], v1, 0, 0, [m0] ; C8250001 v_interp_p1_f32 v10, v0, 1, 0, [m0] ; C8280100 v_interp_p2_f32 v10, [v10], v1, 1, 0, [m0] ; C8290101 v_interp_p1_f32 v11, v0, 2, 0, [m0] ; C82C0200 v_interp_p2_f32 v11, [v11], v1, 2, 0, [m0] ; C82D0201 v_interp_p1_f32 v12, v0, 0, 1, [m0] ; C8300400 v_interp_p2_f32 v12, [v12], v1, 0, 1, [m0] ; C8310401 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s33, s[0:3], 0x24 ; C2108124 s_buffer_load_dword s34, s[0:3], 0x25 ; C2110125 s_buffer_load_dword s35, s[0:3], 0x26 ; C2118126 s_buffer_load_dword s36, s[0:3], 0x28 ; C2120128 s_buffer_load_dword s37, s[0:3], 0x29 ; C2128129 v_interp_p1_f32 v15, v0, 1, 1, [m0] ; C83C0500 v_interp_p2_f32 v15, [v15], v1, 1, 1, [m0] ; C83D0501 v_interp_p1_f32 v16, v0, 2, 1, [m0] ; C8400600 v_interp_p2_f32 v16, [v16], v1, 2, 1, [m0] ; C8410601 v_interp_p1_f32 v5, v0, 3, 1, [m0] ; C8140700 v_interp_p2_f32 v5, [v5], v1, 3, 1, [m0] ; C8150701 v_interp_p1_f32 v13, v0, 0, 2, [m0] ; C8340800 v_interp_p2_f32 v13, [v13], v1, 0, 2, [m0] ; C8350801 v_interp_p1_f32 v14, v0, 1, 2, [m0] ; C8380900 v_interp_p2_f32 v14, [v14], v1, 1, 2, [m0] ; C8390901 v_interp_p1_f32 v3, v0, 0, 3, [m0] ; C80C0C00 v_interp_p2_f32 v3, [v3], v1, 0, 3, [m0] ; C80D0C01 v_interp_p1_f32 v4, v0, 1, 3, [m0] ; C8100D00 v_interp_p2_f32 v4, [v4], v1, 1, 3, [m0] ; C8110D01 v_interp_p1_f32 v17, v0, 0, 4, [m0] ; C8441000 v_interp_p2_f32 v17, [v17], v1, 0, 4, [m0] ; C8451001 v_interp_p1_f32 v19, v0, 1, 4, [m0] ; C84C1100 v_interp_p2_f32 v19, [v19], v1, 1, 4, [m0] ; C84D1101 v_interp_p1_f32 v2, v0, 2, 4, [m0] ; C8081200 v_interp_p2_f32 v2, [v2], v1, 2, 4, [m0] ; C8091201 v_interp_p1_f32 v18, v0, 0, 5, [m0] ; C8481400 v_interp_p2_f32 v18, [v18], v1, 0, 5, [m0] ; C8491401 v_interp_p1_f32 v20, v0, 1, 5, [m0] ; C8501500 v_interp_p2_f32 v20, [v20], v1, 1, 5, [m0] ; C8511501 v_interp_p1_f32 v21, v0, 2, 5, [m0] ; C8541600 v_interp_p2_f32 v21, [v21], v1, 2, 5, [m0] ; C8551601 v_interp_p1_f32 v8, v0, 0, 6, [m0] ; C8201800 v_interp_p2_f32 v8, [v8], v1, 0, 6, [m0] ; C8211801 v_interp_p1_f32 v7, v0, 1, 6, [m0] ; C81C1900 v_interp_p2_f32 v7, [v7], v1, 1, 6, [m0] ; C81D1901 v_interp_p1_f32 v6, v0, 2, 6, [m0] ; C8181A00 v_interp_p2_f32 v6, [v6], v1, 2, 6, [m0] ; C8191A01 s_load_dwordx4 s[12:15], s[4:5], 0x18 ; C0860518 s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710 s_load_dwordx8 s[16:23], s[6:7], 0x30 ; C0C80730 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[24:31], s[8:11] ; F0800700 00461603 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v26, 2.0, v22, -1.0 ; D282001A 03CE2CF4 v_mad_f32 v27, 2.0, v23, -1.0 ; D282001B 03CE2EF4 v_mad_f32 v28, 2.0, v24, -1.0 ; D282001C 03CE30F4 image_sample v13, 1, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[16:23], s[12:15] ; F0800100 00640D0D s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v13, s33, v13 ; 061A1A21 v_mul_f32_e32 v13, s34, v13 ; 101A1A22 v_add_f32_e64 v13, 0, v13 clamp ; D206080D 00021A80 v_mul_f32_e32 v14, v18, v18 ; 101C2512 v_mad_f32 v14, v20, v20, v14 ; D282000E 043A2914 v_mad_f32 v14, v21, v21, v14 ; D282000E 043A2B15 v_rsq_clamp_f32_e64 v22, |v14| ; D3580116 0000010E v_mov_b32_e32 v23, 0xb58637bd ; 7E2E02FF B58637BD v_add_f32_e32 v23, v13, v23 ; 062E2F0D v_cmp_gt_f32_e32 vcc, 0, v23 ; 7C082E80 v_min_f32_e32 v22, 0x7f7fffff, v22 ; 1E2C2CFF 7F7FFFFF s_buffer_load_dword s8, s[0:3], 0x2a ; C204012A v_mul_f32_e32 v18, v18, v22 ; 10242D12 v_mul_f32_e32 v20, v20, v22 ; 10282D14 v_mul_f32_e32 v21, v21, v22 ; 102A2D15 s_buffer_load_dword s9, s[0:3], 0x2c ; C204812C s_buffer_load_dword s10, s[0:3], 0x2d ; C205012D v_and_b32_e32 v13, 0x7fffffff, v13 ; 361A1AFF 7FFFFFFF v_log_f32_e32 v13, v13 ; 7E1A4F0D v_mul_f32_e32 v18, s36, v18 ; 10242424 v_mad_f32 v18, -s37, v20, -v18 ; D2820012 A44A2825 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v18, -s8, v21, v18 ; D2820012 244A2A08 v_mul_legacy_f32_e32 v13, s35, v13 ; 0E1A1A23 v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_cndmask_b32_e64 v13, v13, 0, vcc ; D200000D 01A9010D v_subrev_f32_e32 v18, s9, v18 ; 0A242409 v_mul_f32_e32 v18, s10, v18 ; 1024240A s_buffer_load_dword s25, s[0:3], s32 ; C20C8020 v_mul_f32_e32 v20, v12, v12 ; 1028190C v_mad_f32 v20, v15, v15, v20 ; D2820014 04521F0F v_mad_f32 v20, v16, v16, v20 ; D2820014 04522110 v_rsq_clamp_f32_e32 v29, v20 ; 7E3A5914 v_mul_f32_e32 v20, v9, v9 ; 10281309 v_mad_f32 v20, v10, v10, v20 ; D2820014 0452150A v_mad_f32 v20, v11, v11, v20 ; D2820014 0452170B v_rsq_clamp_f32_e32 v30, v20 ; 7E3C5914 v_mul_f32_e32 v20, v8, v8 ; 10281108 v_mad_f32 v20, v7, v7, v20 ; D2820014 04520F07 v_mad_f32 v20, v6, v6, v20 ; D2820014 04520D06 v_rsq_clamp_f32_e32 v31, v20 ; 7E3E5914 v_mul_f32_e32 v20, v17, v17 ; 10282311 v_mad_f32 v20, v19, v19, v20 ; D2820014 04522713 v_mad_f32 v20, v2, v2, v20 ; D2820014 04520502 v_rsq_clamp_f32_e32 v32, v20 ; 7E405914 v_mul_f32_e32 v20, v26, v26 ; 1028351A v_mad_f32 v20, v27, v27, v20 ; D2820014 0452371B v_mad_f32 v20, v28, v28, v20 ; D2820014 0452391C v_rsq_clamp_f32_e32 v33, v20 ; 7E425914 v_add_f32_e64 v18, 0, v18 clamp ; D2060812 00022480 v_sub_f32_e32 v20, 1.0, v14 ; 08281CF2 s_buffer_load_dword s17, s[0:3], 0x0 ; C2088100 s_buffer_load_dword s16, s[0:3], 0x1 ; C2080101 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110 s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111 s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112 s_buffer_load_dword s26, s[0:3], 0x13 ; C20D0113 s_buffer_load_dword s21, s[0:3], 0x14 ; C20A8114 s_buffer_load_dword s20, s[0:3], 0x15 ; C20A0115 s_buffer_load_dword s19, s[0:3], 0x16 ; C2098116 s_buffer_load_dword s27, s[0:3], 0x17 ; C20D8117 s_buffer_load_dword s22, s[0:3], 0x18 ; C20B0118 s_buffer_load_dword s23, s[0:3], 0x19 ; C20B8119 s_buffer_load_dword s24, s[0:3], 0x1a ; C20C011A s_buffer_load_dword s28, s[0:3], 0x1b ; C20E011B s_buffer_load_dword s11, s[0:3], 0x1c ; C205811C s_buffer_load_dword s10, s[0:3], 0x20 ; C2050120 s_buffer_load_dword s9, s[0:3], 0x21 ; C2048121 s_buffer_load_dword s8, s[0:3], 0x22 ; C2040122 s_buffer_load_dword s18, s[0:3], 0x23 ; C2090123 v_mul_f32_e32 v14, v20, v13 ; 101C1B14 v_mul_f32_e32 v18, v18, v18 ; 10242512 v_mul_f32_e32 v14, v14, v18 ; 101C250E v_cmp_gt_f32_e32 vcc, 0, v14 ; 7C081C80 v_cndmask_b32_e64 v21, 0, -1.0, vcc ; D2000015 01A9E680 v_cndmask_b32_e64 v21, v21, -1.0, vcc ; D2000015 01A9E715 v_cndmask_b32_e64 v21, v21, -1.0, vcc ; D2000015 01A9E715 v_cndmask_b32_e64 v21, v21, -1.0, vcc ; D2000015 01A9E715 v_cmpx_le_f32_e32 vcc, 0, v21 ; 7C262A80 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ne_i32_e64 s[30:31], 0, s25 ; D10A001E 00003280 v_mov_b32_e32 v23, s26 ; 7E2E021A v_mov_b32_e32 v24, s27 ; 7E30021B v_mov_b32_e32 v25, s28 ; 7E32021C v_mov_b32_e32 v21, v13 ; 7E2A030D v_mov_b32_e32 v22, v13 ; 7E2C030D s_and_saveexec_b64 s[26:27], s[30:31] ; BE9A241E s_xor_b64 s[26:27], exec, s[26:27] ; 899A1A7E s_cbranch_execz BB0_2 ; BF880000 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_load_dwordx4 s[36:39], s[4:5], 0x0 ; C0920500 v_interp_p1_f32 v21, v0, 3, 7, [m0] ; C8541F00 s_buffer_load_dword s25, s[0:3], 0x6 ; C20C8106 s_buffer_load_dword s40, s[0:3], 0x7 ; C2140107 s_buffer_load_dword s41, s[0:3], 0x4 ; C2148104 s_buffer_load_dword s42, s[0:3], 0x5 ; C2150105 v_interp_p2_f32 v21, [v21], v1, 3, 7, [m0] ; C8551F01 v_interp_p1_f32 v22, v0, 1, 7, [m0] ; C8581D00 v_rcp_f32_e32 v21, v21 ; 7E2A5515 v_interp_p2_f32 v22, [v22], v1, 1, 7, [m0] ; C8591D01 v_interp_p1_f32 v0, v0, 0, 7, [m0] ; C8001C00 v_interp_p2_f32 v0, [v0], v1, 0, 7, [m0] ; C8011C01 v_mul_f32_e32 v0, v0, v21 ; 10002B00 v_mul_f32_e32 v1, v22, v21 ; 10022B16 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v21, s40 ; 7E2A0228 v_mad_f32 v21, s41, v0, v21 ; D2820015 04560029 v_mov_b32_e32 v0, s25 ; 7E000219 v_mad_f32 v22, s42, v1, v0 ; D2820016 0402022A image_sample v[34:36], 7, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[28:35], s[36:39] ; F0800700 01272215 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v34, v13 ; 102A1B22 v_mul_f32_e32 v22, v35, v13 ; 102C1B23 v_mul_f32_e32 v13, v36, v13 ; 101A1B24 s_or_b64 exec, exec, s[26:27] ; 88FE1A7E v_min_f32_e32 v0, 0x7f7fffff, v29 ; 1E003AFF 7F7FFFFF v_mul_f32_e32 v1, v0, v12 ; 10021900 v_mul_f32_e32 v12, v0, v15 ; 10181F00 v_mul_f32_e32 v0, v0, v16 ; 10002100 v_min_f32_e32 v15, 0x7f7fffff, v30 ; 1E1E3CFF 7F7FFFFF v_mul_f32_e32 v9, v15, v9 ; 1012130F v_mul_f32_e32 v10, v15, v10 ; 1014150F v_mul_f32_e32 v11, v15, v11 ; 1016170F v_min_f32_e32 v15, 0x7f7fffff, v31 ; 1E1E3EFF 7F7FFFFF v_min_f32_e32 v29, 0x7f7fffff, v32 ; 1E3A40FF 7F7FFFFF v_mul_f32_e32 v30, v29, v17 ; 103C231D v_mul_f32_e32 v19, v29, v19 ; 1026271D v_min_f32_e32 v16, 0x7f7fffff, v33 ; 1E2042FF 7F7FFFFF v_mul_f32_e32 v26, v16, v26 ; 10343510 v_mul_f32_e32 v27, v16, v27 ; 10363710 v_mul_f32_e32 v28, v16, v28 ; 10383910 s_load_dwordx4 s[0:3], s[4:5], 0x4 ; C0800504 v_mul_f32_e32 v16, v15, v8 ; 1020110F v_mul_f32_e32 v16, v16, v26 ; 10203510 v_mul_f32_e32 v17, v15, v7 ; 10220F0F v_mad_f32 v16, v27, v17, v16 ; D2820010 0442231B v_mul_f32_e32 v17, v15, v6 ; 10220D0F v_mad_f32 v16, v28, v17, v16 ; D2820010 0442231C v_mul_f32_e32 v17, v26, v16 ; 1022211A v_mad_f32 v17, v16, v26, v17 ; D2820011 04463510 v_mad_f32 v8, -v8, v15, v17 ; D2820008 24461F08 v_mul_f32_e32 v17, v27, v16 ; 1022211B v_mad_f32 v17, v16, v27, v17 ; D2820011 04463710 v_mad_f32 v7, -v7, v15, v17 ; D2820007 24461F07 v_mul_f32_e32 v17, v28, v16 ; 1022211C v_mad_f32 v16, v16, v28, v17 ; D2820010 04463910 v_mad_f32 v6, -v6, v15, v16 ; D2820006 24421F06 v_mul_f32_e32 v15, v8, v9 ; 101E1308 v_mad_f32 v15, v10, v7, v15 ; D282000F 043E0F0A v_mad_f32 v15, v11, v6, v15 ; D282000F 043E0D0B v_mul_f32_e32 v16, v10, v0 ; 1020010A v_mad_f32 v16, v12, v11, -v16 ; D2820010 8442170C v_mul_f32_e32 v11, v11, v1 ; 1016030B v_mad_f32 v11, v0, v9, -v11 ; D282000B 842E1300 v_mul_f32_e32 v9, v9, v12 ; 10121909 v_mad_f32 v9, v1, v10, -v9 ; D2820009 84261501 v_mul_f32_e32 v10, v5, v16 ; 10142105 v_mul_f32_e32 v11, v5, v11 ; 10161705 v_mul_f32_e32 v5, v5, v9 ; 100A1305 v_mul_f32_e32 v9, v8, v10 ; 10121508 v_mad_f32 v9, v11, v7, v9 ; D2820009 04260F0B v_mad_f32 v16, v5, v6, v9 ; D2820010 04260D05 v_mul_f32_e32 v1, v8, v1 ; 10020308 v_mad_f32 v1, v12, v7, v1 ; D2820001 04060F0C v_mad_f32 v17, v0, v6, v1 ; D2820011 04060D00 s_load_dwordx4 s[28:31], s[4:5], 0xc ; C08E050C s_load_dwordx8 s[32:39], s[6:7], 0x18 ; C0D00718 v_cubeid_f32 v34, v15, v16, v17 ; D2880022 0446210F v_cubema_f32 v33, v15, v16, v17 ; D28E0021 0446210F v_cubesc_f32 v32, v15, v16, v17 ; D28A0020 0446210F v_cubetc_f32 v31, v15, v16, v17 ; D28C001F 0446210F s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 v_rcp_f32_e64 v0, |v33| ; D3540100 00000121 s_load_dwordx4 s[48:51], s[4:5], 0x10 ; C0980510 s_load_dwordx4 s[52:55], s[4:5], 0x14 ; C09A0514 s_load_dwordx8 s[56:63], s[6:7], 0x20 ; C0DC0720 s_load_dwordx8 s[64:71], s[6:7], 0x28 ; C0E00728 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[32:39], s[28:31] ; F0800100 00E80103 v_mov_b32_e32 v5, 0x3fc00000 ; 7E0A02FF 3FC00000 v_mad_f32 v33, v31, v0, v5 ; D2820021 0416011F v_mad_f32 v32, v32, v0, v5 ; D2820020 04160120 image_sample v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[40:47], s[0:3] ; F0800700 000A0920 v_mul_f32_e32 v0, s22, v25 ; 10003216 v_mul_f32_e32 v5, s23, v25 ; 100A3217 v_mul_f32_e32 v12, s24, v25 ; 10183218 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v9 ; 10001300 v_mul_f32_e32 v5, v5, v10 ; 100A1505 v_mul_f32_e32 v9, v12, v11 ; 1012170C image_sample v[10:12], 7, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[56:63], s[48:51] ; F0800700 018E0A03 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v1, v0, v10 ; D2820000 042A0101 v_mad_f32 v5, v1, v5, v11 ; D2820005 042E0B01 v_mad_f32 v1, v1, v9, v12 ; D2820001 04321301 image_sample v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[64:71], s[52:55] ; F0800700 01B00903 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v24, v9, s21 ; D2820003 00561318 v_mad_f32 v4, v10, v24, s20 ; D2820004 0052310A v_mad_f32 v9, v11, v24, s19 ; D2820009 004E310B v_mul_f32_e32 v10, v30, v26 ; 1014351E v_mad_f32 v10, v27, v19, v10 ; D282000A 042A271B v_mul_f32_e32 v8, v30, v8 ; 1010111E v_mad_f32 v7, v7, v19, v8 ; D2820007 04222707 v_sub_f32_e64 v8, 1.0, s17 ; D2080008 000022F2 v_mul_f32_e32 v0, v0, v8 ; 10001100 v_sub_f32_e64 v8, 1.0, s16 ; D2080008 000020F2 v_mul_f32_e32 v5, v5, v8 ; 100A1105 v_sub_f32_e64 v8, 1.0, s15 ; D2080008 00001EF2 v_mul_f32_e32 v1, v1, v8 ; 10021101 v_mad_f32 v0, v23, v0, s12 ; D2820000 00320117 v_mad_f32 v5, v5, v23, s13 ; D2820005 00362F05 v_mad_f32 v1, v1, v23, s14 ; D2820001 003A2F01 v_mul_f32_e32 v2, v29, v2 ; 1004051D v_max_f32_e32 v8, 0, v20 ; 20102880 v_mov_b32_e32 v11, 0x7fffffff ; 7E1602FF 7FFFFFFF v_and_b32_e32 v12, v8, v11 ; 36181708 v_log_f32_e32 v12, v12 ; 7E184F0C v_mov_b32_e32 v15, 0xb58637bd ; 7E1E02FF B58637BD v_add_f32_e32 v8, v15, v8 ; 0610110F v_cmp_gt_f32_e32 vcc, 0, v8 ; 7C081080 v_mul_legacy_f32_e32 v8, s18, v12 ; 0E101812 v_exp_f32_e32 v8, v8 ; 7E104B08 v_cndmask_b32_e64 v8, v8, 0, vcc ; D2000008 01A90108 v_mad_f32 v10, v28, v2, v10 ; D282000A 042A051C v_mad_f32 v2, v6, v2, v7 ; D2820002 041E0506 v_add_f32_e64 v6, 0, v10 clamp ; D2060806 00021480 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_add_f32_e32 v6, v15, v6 ; 060C0D0F v_and_b32_e32 v7, v2, v11 ; 360E1702 v_log_f32_e32 v7, v7 ; 7E0E4F07 v_cmp_gt_f32_e32 vcc, 0, v6 ; 7C080C80 v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 v_cndmask_b32_e64 v5, v5, 0, vcc ; D2000005 01A90105 v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 v_add_f32_e32 v2, v15, v2 ; 0604050F v_mul_legacy_f32_e32 v6, s11, v7 ; 0E0C0E0B v_mov_b32_e32 v7, 0x41000000 ; 7E0E02FF 41000000 v_add_f32_e32 v7, s11, v7 ; 060E0E0B v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_mul_f32_e32 v6, v6, v7 ; 100C0F06 v_mul_f32_e32 v6, 0x3d22f983, v6 ; 100C0CFF 3D22F983 v_mul_f32_e32 v3, v6, v3 ; 10060706 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v6, v6, v9 ; 100C1306 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v2, v3, 0, vcc ; D2000002 01A90103 v_cndmask_b32_e64 v3, v4, 0, vcc ; D2000003 01A90104 v_cndmask_b32_e64 v4, v6, 0, vcc ; D2000004 01A90106 v_add_f32_e32 v0, v2, v0 ; 06000102 v_add_f32_e32 v2, v3, v5 ; 06040B03 v_add_f32_e32 v1, v4, v1 ; 06020304 v_mul_f32_e32 v0, v0, v8 ; 10001100 v_mul_f32_e32 v2, v2, v8 ; 10041102 v_mul_f32_e32 v1, v1, v8 ; 10021101 v_mul_f32_e32 v0, v0, v21 ; 10002B00 v_mul_f32_e32 v2, v2, v22 ; 10042D02 v_mul_f32_e32 v1, v1, v13 ; 10021B01 v_mul_f32_e32 v0, s10, v0 ; 1000000A v_mul_f32_e32 v2, s9, v2 ; 10040409 v_mul_f32_e32 v1, s8, v1 ; 10020208 v_mul_f32_e32 v0, v0, v18 ; 10002500 v_mul_f32_e32 v2, v2, v18 ; 10042502 v_mul_f32_e32 v1, v1, v18 ; 10022501 v_xor_b32_e32 v3, 0x80000000, v14 ; 3A061CFF 80000000 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v0, 0, v0, vcc ; D2000000 01AA0080 v_cndmask_b32_e64 v2, 0, v2, vcc ; D2000002 01AA0480 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 v_cndmask_b32_e64 v1, 0, v1, vcc ; D2000001 01AA0280 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 40 Code Size: 1784 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL OUT[0], COLOR DCL CONST[0..6] DCL TEMP[0..1] DCL TEMP[2], LOCAL IMM[0] FLT32 { 0.2209, 0.1138, 0.0102, 0.0000} IMM[1] FLT32 { 2.0000, 127.0000, 0.0000, -340282346638528859811704183484516925440.0000} IMM[2] FLT32 { 0.3390, 0.6780, 0.1130, 255.0000} IMM[3] FLT32 { 0.4184, 0.7319, 0.2969, 0.0039} 0: MOV TEMP[0].y, CONST[6].yyyy 1: ADD TEMP[0].xyz, TEMP[0].yyyy, CONST[0] 2: DP3 TEMP[1].x, TEMP[0], IMM[0] 3: DP3 TEMP[1].y, TEMP[0], IMM[2] 4: DP3 TEMP[1].z, TEMP[0], IMM[3] 5: MAX TEMP[0].xyz, TEMP[1], IMM[0].wwww 6: RCP TEMP[0].z, TEMP[0].zzzz 7: MUL OUT[0].xy, TEMP[0].zzzz, TEMP[0] 8: LG2 TEMP[2].x, |TEMP[0].yyyy| 9: MAX TEMP[0].x, IMM[1].wwww, TEMP[2].xxxx 10: MAD TEMP[0].y, TEMP[0].xxxx, IMM[1].xxxx, IMM[1].yyyy 11: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].xxxx 12: FRC TEMP[0].x, TEMP[0].xxxx 13: MUL TEMP[0].z, TEMP[0].xxxx, IMM[2].wwww 14: MOV OUT[0].w, TEMP[0].xxxx 15: FRC TEMP[0].x, TEMP[0].zzzz 16: ADD TEMP[0].x, -TEMP[0].xxxx, TEMP[0].zzzz 17: MAD TEMP[0].x, TEMP[0].xxxx, -IMM[3].wwww, TEMP[0].yyyy 18: MUL OUT[0].z, TEMP[0].xxxx, IMM[3].wwww 19: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %28 = fadd float %27, %24 %29 = fadd float %27, %25 %30 = fadd float %27, %26 %31 = fmul float %28, 0x3FCC467380000000 %32 = fmul float %29, 0x3FBD21FF20000000 %33 = fadd float %32, %31 %34 = fmul float %30, 0x3F84E3BCE0000000 %35 = fadd float %33, %34 %36 = fmul float %28, 0x3FD5B22D00000000 %37 = fmul float %29, 0x3FE5B22D00000000 %38 = fadd float %37, %36 %39 = fmul float %30, 0x3FBCED9160000000 %40 = fadd float %38, %39 %41 = fmul float %28, 0x3FDAC710C0000000 %42 = fmul float %29, 0x3FE76BB980000000 %43 = fadd float %42, %41 %44 = fmul float %30, 0x3FD30068E0000000 %45 = fadd float %43, %44 %46 = call float @llvm.maxnum.f32(float %35, float 0x3EB0C6F7A0000000) %47 = call float @llvm.maxnum.f32(float %40, float 0x3EB0C6F7A0000000) %48 = call float @llvm.maxnum.f32(float %45, float 0x3EB0C6F7A0000000) %49 = fdiv float 1.000000e+00, %48 %50 = fmul float %49, %46 %51 = fmul float %49, %47 %52 = call float @fabs(float %47) %53 = call float @llvm.log2.f32(float %52) %54 = call float @llvm.maxnum.f32(float %53, float 0xC7EFFFFFE0000000) %55 = fmul float %54, 2.000000e+00 %56 = fadd float %55, 1.270000e+02 %57 = fadd float %54, %54 %58 = call float @llvm.AMDIL.fraction.(float %57) %59 = fmul float %58, 2.550000e+02 %60 = call float @llvm.AMDIL.fraction.(float %59) %61 = fsub float %59, %60 %62 = fmul float %61, 0xBF70101020000000 %63 = fadd float %62, %56 %64 = fmul float %63, 0x3F70101020000000 %65 = call i32 @llvm.SI.packf16(float %50, float %51) %66 = bitcast i32 %65 to float %67 = call i32 @llvm.SI.packf16(float %64, float %58) %68 = bitcast i32 %67 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %66, float %68, float %66, float %68) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 v_mov_b32_e32 v0, 0xff7fffff ; 7E0002FF FF7FFFFF s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s5 ; 7E020205 v_add_f32_e32 v1, s4, v1 ; 06020204 v_mov_b32_e32 v2, s6 ; 7E040206 v_add_f32_e32 v2, s4, v2 ; 06040404 v_mul_f32_e32 v3, 0x3e62339c, v1 ; 100602FF 3E62339C v_mul_f32_e32 v4, 0x3ead9168, v1 ; 100802FF 3EAD9168 v_mul_f32_e32 v1, 0x3ed63886, v1 ; 100202FF 3ED63886 v_madmk_f32_e32 v3, v2, v3, 0x3de90ff9 ; 40060702 3DE90FF9 v_madmk_f32_e32 v4, v2, v4, 0x3f2d9168 ; 40080902 3F2D9168 v_madmk_f32_e32 v1, v2, v1, 0x3f3b5dcc ; 40020302 3F3B5DCC v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mov_b32_e32 v5, s0 ; 7E0A0200 v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_madmk_f32_e32 v3, v5, v3, 0x3c271de7 ; 40060705 3C271DE7 v_madmk_f32_e32 v4, v5, v4, 0x3de76c8b ; 40080905 3DE76C8B v_madmk_f32_e32 v1, v5, v1, 0x3e980347 ; 40020305 3E980347 v_max_f32_e32 v1, 0x358637bd, v1 ; 200202FF 358637BD v_rcp_f32_e32 v1, v1 ; 7E025501 v_max_f32_e32 v4, 0x358637bd, v4 ; 200808FF 358637BD v_log_f32_e64 v5, |v4| ; D34E0105 00000104 v_max_f32_e32 v3, 0x358637bd, v3 ; 200606FF 358637BD v_mul_f32_e32 v3, v3, v1 ; 10060303 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_max_f32_e32 v0, v5, v0 ; 20000105 v_madak_f32_e32 v4, 2.0, v0, 0x42fe0000 ; 420800F4 42FE0000 v_add_f32_e32 v5, v0, v0 ; 060A0100 v_floor_f32_e32 v5, v5 ; 7E0A4905 v_mad_f32 v0, 2.0, v0, -v5 ; D2820000 841600F4 v_mul_f32_e32 v5, 0x437f0000, v0 ; 100A00FF 437F0000 v_floor_f32_e32 v5, v5 ; 7E0A4905 v_mad_f32 v5, v0, v2, -v5 ; D2820005 84160500 v_mad_f32 v2, v0, v2, -v5 ; D2820002 84160500 v_madmk_f32_e32 v2, v2, v4, 0xbb808081 ; 40040902 BB808081 v_mul_f32_e32 v2, 0x3b808081, v2 ; 100404FF 3B808081 v_cvt_pkrtz_f16_f32_e32 v1, v3, v1 ; 5E020303 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 8 Code Size: 276 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], TEXCOORD[0] DCL CONST[0..15] DCL TEMP[0..6] IMM[0] FLT32 { 0.5000, 6.2832, -3.1416, 0.1592} IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[7], IN[0].yyyy 1: MAD TEMP[0], CONST[6], IN[0].xxxx, TEMP[0] 2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[9], IN[0].wwww, TEMP[0] 4: ADD TEMP[1].xyz, TEMP[0], -CONST[5] 5: MOV TEMP[2].yw, CONST[14] 6: MAD TEMP[1].w, CONST[13].zzzz, TEMP[1].yyyy, TEMP[2].yyyy 7: ADD TEMP[1].w, TEMP[1].wwww, IMM[0].xxxx 8: FRC TEMP[1].w, TEMP[1].wwww 9: MAD TEMP[1].w, TEMP[1].wwww, IMM[0].yyyy, IMM[0].zzzz 10: SCS TEMP[3].y, TEMP[1].wwww 11: MUL TEMP[1].w, TEMP[3].yyyy, CONST[14].zzzz 12: MAD TEMP[2].x, TEMP[2].wwww, TEMP[1].yyyy, CONST[15].yyyy 13: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx 14: FRC TEMP[2].x, TEMP[2].xxxx 15: MAD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy, IMM[0].zzzz 16: SCS TEMP[3].y, TEMP[2].xxxx 17: MUL TEMP[2].x, TEMP[3].yyyy, CONST[15].zzzz 18: MUL TEMP[1].w, TEMP[1].wwww, TEMP[2].xxxx 19: MAD TEMP[1].w, TEMP[1].wwww, IMM[0].wwww, IMM[0].xxxx 20: FRC TEMP[1].w, TEMP[1].wwww 21: MAD TEMP[1].w, TEMP[1].wwww, IMM[0].yyyy, IMM[0].zzzz 22: SCS TEMP[2].xy, TEMP[1].wwww 23: MOV TEMP[3].xyz, CONST[10] 24: ADD TEMP[3].xyz, TEMP[3], CONST[11] 25: ADD TEMP[4].xyz, TEMP[1], -TEMP[3] 26: MOV TEMP[5].yz, CONST[12].xxyw 27: MOV TEMP[5].x, CONST[15].wwww 28: DP3 TEMP[1].w, TEMP[5].yzxw, TEMP[4] 29: MAD TEMP[3].xyz, TEMP[5].yzxw, TEMP[1].wwww, TEMP[3] 30: ADD TEMP[4].xyz, TEMP[1], -TEMP[3] 31: MUL TEMP[6].xyz, TEMP[4].yzxw, TEMP[5] 32: MAD TEMP[5].xyz, TEMP[5].zxyw, TEMP[4].zxyw, -TEMP[6] 33: MUL TEMP[2].yzw, TEMP[2].yyyy, TEMP[5].xxyz 34: MAD TEMP[2].xyz, TEMP[4], TEMP[2].xxxx, TEMP[2].yzww 35: ADD TEMP[2].xyz, TEMP[2], TEMP[3] 36: ADD TEMP[1].xyz, -TEMP[1], TEMP[2] 37: ADD TEMP[0].xyz, TEMP[0], TEMP[1] 38: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 39: MAD TEMP[1], CONST[0], TEMP[0].xxxx, TEMP[1] 40: MAD TEMP[1], CONST[2], TEMP[0].zzzz, TEMP[1] 41: MAD OUT[0], CONST[3], TEMP[0].wwww, TEMP[1] 42: MOV OUT[1], IN[1] 43: MUL OUT[2], IMM[1].xxyy, IN[2].xyxx 44: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %63 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = add i32 %5, %7 %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %64, i32 0, i32 %65) %67 = extractelement <4 x float> %66, i32 0 %68 = extractelement <4 x float> %66, i32 1 %69 = extractelement <4 x float> %66, i32 2 %70 = extractelement <4 x float> %66, i32 3 %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = add i32 %5, %7 %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73) %75 = extractelement <4 x float> %74, i32 0 %76 = extractelement <4 x float> %74, i32 1 %77 = extractelement <4 x float> %74, i32 2 %78 = extractelement <4 x float> %74, i32 3 %79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = add i32 %5, %7 %82 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %81) %83 = extractelement <4 x float> %82, i32 0 %84 = extractelement <4 x float> %82, i32 1 %85 = fmul float %36, %68 %86 = fmul float %37, %68 %87 = fmul float %38, %68 %88 = fmul float %39, %68 %89 = fmul float %32, %67 %90 = fadd float %89, %85 %91 = fmul float %33, %67 %92 = fadd float %91, %86 %93 = fmul float %34, %67 %94 = fadd float %93, %87 %95 = fmul float %35, %67 %96 = fadd float %95, %88 %97 = fmul float %40, %69 %98 = fadd float %97, %90 %99 = fmul float %41, %69 %100 = fadd float %99, %92 %101 = fmul float %42, %69 %102 = fadd float %101, %94 %103 = fmul float %43, %69 %104 = fadd float %103, %96 %105 = fmul float %44, %70 %106 = fadd float %105, %98 %107 = fmul float %45, %70 %108 = fadd float %107, %100 %109 = fmul float %46, %70 %110 = fadd float %109, %102 %111 = fmul float %47, %70 %112 = fadd float %111, %104 %113 = fsub float %106, %29 %114 = fsub float %108, %30 %115 = fsub float %110, %31 %116 = fmul float %56, %114 %117 = fadd float %116, %57 %118 = fadd float %117, 5.000000e-01 %119 = call float @llvm.AMDIL.fraction.(float %118) %120 = fmul float %119, 0x401921FB60000000 %121 = fadd float %120, 0xC00921FB60000000 %122 = call float @llvm.sin.f32(float %121) %123 = fmul float %122, %58 %124 = fmul float %59, %114 %125 = fadd float %124, %60 %126 = fadd float %125, 5.000000e-01 %127 = call float @llvm.AMDIL.fraction.(float %126) %128 = fmul float %127, 0x401921FB60000000 %129 = fadd float %128, 0xC00921FB60000000 %130 = call float @llvm.sin.f32(float %129) %131 = fmul float %130, %61 %132 = fmul float %123, %131 %133 = fmul float %132, 0x3FC45F3060000000 %134 = fadd float %133, 5.000000e-01 %135 = call float @llvm.AMDIL.fraction.(float %134) %136 = fmul float %135, 0x401921FB60000000 %137 = fadd float %136, 0xC00921FB60000000 %138 = call float @llvm.cos.f32(float %137) %139 = call float @llvm.sin.f32(float %137) %140 = fadd float %48, %51 %141 = fadd float %49, %52 %142 = fadd float %50, %53 %143 = fsub float %113, %140 %144 = fsub float %114, %141 %145 = fsub float %115, %142 %146 = fmul float %54, %143 %147 = fmul float %55, %144 %148 = fadd float %147, %146 %149 = fmul float %62, %145 %150 = fadd float %148, %149 %151 = fmul float %54, %150 %152 = fadd float %151, %140 %153 = fmul float %55, %150 %154 = fadd float %153, %141 %155 = fmul float %62, %150 %156 = fadd float %155, %142 %157 = fsub float %113, %152 %158 = fsub float %114, %154 %159 = fsub float %115, %156 %160 = fmul float %158, %62 %161 = fmul float %159, %54 %162 = fmul float %157, %55 %163 = fmul float %55, %159 %164 = fsub float %163, %160 %165 = fmul float %62, %157 %166 = fsub float %165, %161 %167 = fmul float %54, %158 %168 = fsub float %167, %162 %169 = fmul float %139, %164 %170 = fmul float %139, %166 %171 = fmul float %139, %168 %172 = fmul float %157, %138 %173 = fadd float %172, %169 %174 = fmul float %158, %138 %175 = fadd float %174, %170 %176 = fmul float %159, %138 %177 = fadd float %176, %171 %178 = fadd float %173, %152 %179 = fadd float %175, %154 %180 = fadd float %177, %156 %181 = fsub float %178, %113 %182 = fsub float %179, %114 %183 = fsub float %180, %115 %184 = fadd float %106, %181 %185 = fadd float %108, %182 %186 = fadd float %110, %183 %187 = fmul float %185, %17 %188 = fmul float %185, %18 %189 = fmul float %185, %19 %190 = fmul float %185, %20 %191 = fmul float %13, %184 %192 = fadd float %191, %187 %193 = fmul float %14, %184 %194 = fadd float %193, %188 %195 = fmul float %15, %184 %196 = fadd float %195, %189 %197 = fmul float %16, %184 %198 = fadd float %197, %190 %199 = fmul float %21, %186 %200 = fadd float %199, %192 %201 = fmul float %22, %186 %202 = fadd float %201, %194 %203 = fmul float %23, %186 %204 = fadd float %203, %196 %205 = fmul float %24, %186 %206 = fadd float %205, %198 %207 = fmul float %25, %112 %208 = fadd float %207, %200 %209 = fmul float %26, %112 %210 = fadd float %209, %202 %211 = fmul float %27, %112 %212 = fadd float %211, %204 %213 = fmul float %28, %112 %214 = fadd float %213, %206 %215 = fmul float %83, 0.000000e+00 %216 = fmul float %83, 0.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %75, float %76, float %77, float %78) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %83, float %84, float %215, float %216) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %208, float %210, float %212, float %214) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_buffer_load_dword s5, s[0:3], 0x1b ; C202811B s_buffer_load_dword s6, s[0:3], 0x1c ; C203011C s_buffer_load_dword s7, s[0:3], 0x1d ; C203811D s_buffer_load_dword s9, s[0:3], 0x1e ; C204811E s_buffer_load_dword s10, s[0:3], 0x1f ; C205011F s_buffer_load_dword s11, s[0:3], 0x20 ; C2058120 s_buffer_load_dword s12, s[0:3], 0x21 ; C2060121 s_buffer_load_dword s13, s[0:3], 0x22 ; C2068122 s_buffer_load_dword s14, s[0:3], 0x23 ; C2070123 s_buffer_load_dword s15, s[0:3], 0x24 ; C2078124 s_buffer_load_dword s16, s[0:3], 0x25 ; C2080125 s_buffer_load_dword s17, s[0:3], 0x26 ; C2088126 s_buffer_load_dword s18, s[0:3], 0x27 ; C2090127 s_buffer_load_dword s19, s[0:3], 0x18 ; C2098118 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s6, v2 ; 10000406 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 v_mad_f32 v0, s19, v1, v0 ; D2820000 04020213 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v5, s7, v2 ; 100A0407 v_mad_f32 v5, s8, v1, v5 ; D2820005 04160208 v_mul_f32_e32 v6, s9, v2 ; 100C0409 v_mad_f32 v6, s4, v1, v6 ; D2820006 041A0204 v_mul_f32_e32 v2, s10, v2 ; 1004040A v_mad_f32 v1, s5, v1, v2 ; D2820001 040A0205 v_mad_f32 v0, s11, v3, v0 ; D2820000 0402060B v_mad_f32 v2, s12, v3, v5 ; D2820002 0416060C v_mad_f32 v5, s13, v3, v6 ; D2820005 041A060D v_mad_f32 v1, s14, v3, v1 ; D2820001 0406060E v_mad_f32 v0, s15, v4, v0 ; D2820000 0402080F v_mad_f32 v2, s16, v4, v2 ; D2820002 040A0810 v_mad_f32 v3, s17, v4, v5 ; D2820003 04160811 v_mad_f32 v1, s18, v4, v1 ; D2820001 04060812 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114 s_buffer_load_dword s6, s[0:3], 0x15 ; C2030115 s_buffer_load_dword s7, s[0:3], 0x16 ; C2038116 s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x3 ; C2058103 s_buffer_load_dword s12, s[0:3], 0x4 ; C2060104 s_buffer_load_dword s13, s[0:3], 0x5 ; C2068105 s_buffer_load_dword s14, s[0:3], 0x6 ; C2070106 s_buffer_load_dword s15, s[0:3], 0x7 ; C2078107 s_buffer_load_dword s16, s[0:3], 0x8 ; C2080108 s_buffer_load_dword s17, s[0:3], 0x9 ; C2088109 s_buffer_load_dword s18, s[0:3], 0x2e ; C209012E s_buffer_load_dword s19, s[0:3], 0x30 ; C2098130 s_buffer_load_dword s20, s[0:3], 0x31 ; C20A0131 s_buffer_load_dword s21, s[0:3], 0x36 ; C20A8136 s_buffer_load_dword s22, s[0:3], 0x39 ; C20B0139 s_buffer_load_dword s23, s[0:3], 0x3a ; C20B813A s_buffer_load_dword s24, s[0:3], 0x3b ; C20C013B s_buffer_load_dword s25, s[0:3], 0x3d ; C20C813D s_buffer_load_dword s26, s[0:3], 0xa ; C20D010A s_buffer_load_dword s27, s[0:3], 0xb ; C20D810B s_buffer_load_dword s28, s[0:3], 0xc ; C20E010C s_buffer_load_dword s29, s[0:3], 0xd ; C20E810D s_buffer_load_dword s30, s[0:3], 0xe ; C20F010E s_buffer_load_dword s31, s[0:3], 0x28 ; C20F8128 s_buffer_load_dword s32, s[0:3], 0x29 ; C2100129 s_buffer_load_dword s33, s[0:3], 0x2a ; C210812A s_buffer_load_dword s34, s[0:3], 0x2c ; C211012C s_buffer_load_dword s35, s[0:3], 0x2d ; C211812D s_buffer_load_dword s36, s[0:3], 0x3e ; C212013E s_buffer_load_dword s0, s[0:3], 0x3f ; C200013F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s22 ; 7E080216 v_subrev_f32_e32 v5, s6, v2 ; 0A0A0406 v_mad_f32 v4, v5, s21, v4 ; D2820004 04102B05 v_mov_b32_e32 v6, s25 ; 7E0C0219 v_mad_f32 v6, v5, s24, v6 ; D2820006 04183105 v_add_f32_e32 v4, 0.5, v4 ; 060808F0 v_floor_f32_e32 v7, v4 ; 7E0E4904 v_subrev_f32_e32 v4, v7, v4 ; 0A080907 v_mov_b32_e32 v7, 0xc0490fdb ; 7E0E02FF C0490FDB v_mov_b32_e32 v8, 0x40c90fdb ; 7E1002FF 40C90FDB v_mad_f32 v4, v4, v8, v7 ; D2820004 041E1104 v_mul_f32_e32 v4, 0x3e22f983, v4 ; 100808FF 3E22F983 v_fract_f32_e32 v4, v4 ; 7E084104 v_sin_f32_e32 v4, v4 ; 7E086B04 v_mul_f32_e32 v4, s23, v4 ; 10080817 v_add_f32_e32 v6, 0.5, v6 ; 060C0CF0 v_floor_f32_e32 v11, v6 ; 7E164906 v_subrev_f32_e32 v6, v11, v6 ; 0A0C0D0B v_mad_f32 v6, v6, v8, v7 ; D2820006 041E1106 v_mul_f32_e32 v6, 0x3e22f983, v6 ; 100C0CFF 3E22F983 v_fract_f32_e32 v6, v6 ; 7E0C4106 v_sin_f32_e32 v6, v6 ; 7E0C6B06 v_mul_f32_e32 v6, s36, v6 ; 100C0C24 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mov_b32_e32 v6, 0x3e22f983 ; 7E0C02FF 3E22F983 v_mad_f32 v4, v4, v6, 0.5 ; D2820004 03C20D04 v_floor_f32_e32 v6, v4 ; 7E0C4904 v_subrev_f32_e32 v4, v6, v4 ; 0A080906 v_mad_f32 v4, v4, v8, v7 ; D2820004 041E1104 v_mov_b32_e32 v6, s34 ; 7E0C0222 v_add_f32_e32 v6, s31, v6 ; 060C0C1F v_mov_b32_e32 v7, s35 ; 7E0E0223 v_add_f32_e32 v7, s32, v7 ; 060E0E20 v_mov_b32_e32 v8, s18 ; 7E100212 v_add_f32_e32 v8, s33, v8 ; 06101021 v_subrev_f32_e32 v11, s5, v0 ; 0A160005 v_subrev_f32_e32 v12, v6, v11 ; 0A181706 v_mul_f32_e32 v12, s19, v12 ; 10181813 v_subrev_f32_e32 v13, v7, v5 ; 0A1A0B07 v_mad_f32 v12, s20, v13, v12 ; D282000C 04321A14 v_subrev_f32_e32 v13, s7, v3 ; 0A1A0607 v_subrev_f32_e32 v14, v8, v13 ; 0A1C1B08 v_mad_f32 v12, s0, v14, v12 ; D282000C 04321C00 v_mad_f32 v6, s19, v12, v6 ; D2820006 041A1813 v_mad_f32 v7, s20, v12, v7 ; D2820007 041E1814 v_mad_f32 v8, s0, v12, v8 ; D2820008 04221800 v_subrev_f32_e32 v12, v6, v11 ; 0A181706 v_subrev_f32_e32 v14, v8, v13 ; 0A1C1B08 v_mul_f32_e32 v15, s19, v14 ; 101E1C13 v_mad_f32 v15, s0, v12, -v15 ; D282000F 843E1800 v_subrev_f32_e32 v16, v7, v5 ; 0A200B07 v_mul_f32_e32 v17, s0, v16 ; 10222000 v_mad_f32 v17, s20, v14, -v17 ; D2820011 84461C14 v_mul_f32_e32 v18, s20, v12 ; 10241814 v_mad_f32 v18, s19, v16, -v18 ; D2820012 844A2013 v_mul_f32_e32 v4, 0x3e22f983, v4 ; 100808FF 3E22F983 v_fract_f32_e32 v4, v4 ; 7E084104 v_sin_f32_e32 v19, v4 ; 7E266B04 v_mad_f32 v7, v19, v15, v7 ; D2820007 041E1F13 v_cos_f32_e32 v4, v4 ; 7E086D04 v_mad_f32 v7, v16, v4, v7 ; D2820007 041E0910 v_subrev_f32_e32 v5, v5, v7 ; 0A0A0F05 v_add_f32_e32 v2, v5, v2 ; 06040505 v_mul_f32_e32 v5, s12, v2 ; 100A040C v_mul_f32_e32 v7, s13, v2 ; 100E040D v_mul_f32_e32 v15, s14, v2 ; 101E040E v_mul_f32_e32 v2, s15, v2 ; 1004040F v_mad_f32 v6, v19, v17, v6 ; D2820006 041A2313 v_mad_f32 v6, v12, v4, v6 ; D2820006 041A090C v_subrev_f32_e32 v6, v11, v6 ; 0A0C0D0B v_add_f32_e32 v0, v6, v0 ; 06000106 v_mad_f32 v5, s8, v0, v5 ; D2820005 04160008 v_mad_f32 v6, s9, v0, v7 ; D2820006 041E0009 v_mad_f32 v7, s10, v0, v15 ; D2820007 043E000A v_mad_f32 v0, s11, v0, v2 ; D2820000 040A000B v_mad_f32 v2, v19, v18, v8 ; D2820002 04222513 v_mad_f32 v2, v14, v4, v2 ; D2820002 040A090E v_subrev_f32_e32 v2, v13, v2 ; 0A04050D v_add_f32_e32 v2, v2, v3 ; 06040702 v_mad_f32 v3, s16, v2, v5 ; D2820003 04160410 v_mad_f32 v4, s17, v2, v6 ; D2820004 041A0411 v_mad_f32 v5, s26, v2, v7 ; D2820005 041E041A v_mul_f32_e32 v6, 0, v9 ; 100C1280 exp 15, 33, 0, 0, 0, v9, v10, v6, v6 ; F800021F 06060A09 v_mad_f32 v0, s27, v2, v0 ; D2820000 0402041B v_mad_f32 v2, s28, v1, v3 ; D2820002 040E021C v_mad_f32 v3, s29, v1, v4 ; D2820003 0412021D v_mad_f32 v4, s30, v1, v5 ; D2820004 0416021E v_mad_f32 v0, s4, v1, v0 ; D2820000 04020204 exp 15, 12, 0, 1, 0, v2, v3, v4, v0 ; F80008CF 00040302 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 20 Code Size: 904 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..7] DCL TEMP[0..1] DCL TEMP[2], LOCAL IMM[0] FLT32 { 0.2209, 0.1138, 0.0102, 0.0000} IMM[1] FLT32 { 2.0000, 127.0000, 0.0000, -340282346638528859811704183484516925440.0000} IMM[2] FLT32 { 0.3390, 0.6780, 0.1130, 255.0000} IMM[3] FLT32 { 0.4184, 0.7319, 0.2969, 0.0039} 0: MUL TEMP[0].xy, CONST[7].xxxx, IN[0] 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[1].xyz, CONST[6], CONST[6].wwww 3: MAD TEMP[0].xyz, TEMP[1], TEMP[0], CONST[0] 4: DP3 TEMP[1].x, TEMP[0], IMM[0] 5: DP3 TEMP[1].y, TEMP[0], IMM[2] 6: DP3 TEMP[1].z, TEMP[0], IMM[3] 7: MAX TEMP[0].xyz, TEMP[1], IMM[0].wwww 8: RCP TEMP[0].z, TEMP[0].zzzz 9: MUL OUT[0].xy, TEMP[0].zzzz, TEMP[0] 10: LG2 TEMP[2].x, |TEMP[0].yyyy| 11: MAX TEMP[0].x, IMM[1].wwww, TEMP[2].xxxx 12: MAD TEMP[0].y, TEMP[0].xxxx, IMM[1].xxxx, IMM[1].yyyy 13: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].xxxx 14: FRC TEMP[0].x, TEMP[0].xxxx 15: MUL TEMP[0].z, TEMP[0].xxxx, IMM[2].wwww 16: MOV OUT[0].w, TEMP[0].xxxx 17: FRC TEMP[0].x, TEMP[0].zzzz 18: ADD TEMP[0].x, -TEMP[0].xxxx, TEMP[0].zzzz 19: MAD TEMP[0].x, TEMP[0].xxxx, -IMM[3].wwww, TEMP[0].yyyy 20: MUL OUT[0].z, TEMP[0].xxxx, IMM[3].wwww 21: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %32 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %33 = load <32 x i8>, <32 x i8> addrspace(2)* %32, align 32, !tbaa !0 %34 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 %36 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %38 = fmul float %31, %36 %39 = fmul float %31, %37 %40 = bitcast float %38 to i32 %41 = bitcast float %39 to i32 %42 = insertelement <2 x i32> undef, i32 %40, i32 0 %43 = insertelement <2 x i32> %42, i32 %41, i32 1 %44 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %43, <32 x i8> %33, <16 x i8> %35, i32 2) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = fmul float %27, %30 %49 = fmul float %28, %30 %50 = fmul float %29, %30 %51 = fmul float %48, %45 %52 = fadd float %51, %24 %53 = fmul float %49, %46 %54 = fadd float %53, %25 %55 = fmul float %50, %47 %56 = fadd float %55, %26 %57 = fmul float %52, 0x3FCC467380000000 %58 = fmul float %54, 0x3FBD21FF20000000 %59 = fadd float %58, %57 %60 = fmul float %56, 0x3F84E3BCE0000000 %61 = fadd float %59, %60 %62 = fmul float %52, 0x3FD5B22D00000000 %63 = fmul float %54, 0x3FE5B22D00000000 %64 = fadd float %63, %62 %65 = fmul float %56, 0x3FBCED9160000000 %66 = fadd float %64, %65 %67 = fmul float %52, 0x3FDAC710C0000000 %68 = fmul float %54, 0x3FE76BB980000000 %69 = fadd float %68, %67 %70 = fmul float %56, 0x3FD30068E0000000 %71 = fadd float %69, %70 %72 = call float @llvm.maxnum.f32(float %61, float 0x3EB0C6F7A0000000) %73 = call float @llvm.maxnum.f32(float %66, float 0x3EB0C6F7A0000000) %74 = call float @llvm.maxnum.f32(float %71, float 0x3EB0C6F7A0000000) %75 = fdiv float 1.000000e+00, %74 %76 = fmul float %75, %72 %77 = fmul float %75, %73 %78 = call float @fabs(float %73) %79 = call float @llvm.log2.f32(float %78) %80 = call float @llvm.maxnum.f32(float %79, float 0xC7EFFFFFE0000000) %81 = fmul float %80, 2.000000e+00 %82 = fadd float %81, 1.270000e+02 %83 = fadd float %80, %80 %84 = call float @llvm.AMDIL.fraction.(float %83) %85 = fmul float %84, 2.550000e+02 %86 = call float @llvm.AMDIL.fraction.(float %85) %87 = fsub float %85, %86 %88 = fmul float %87, 0xBF70101020000000 %89 = fadd float %88, %82 %90 = fmul float %89, 0x3F70101020000000 %91 = call i32 @llvm.SI.packf16(float %76, float %77) %92 = bitcast i32 %91 to float %93 = call i32 @llvm.SI.packf16(float %90, float %84) %94 = bitcast i32 %93 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %92, float %94, float %92, float %94) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v0, v0, 1, 0, [m0] ; C8000100 v_interp_p2_f32 v0, [v0], v1, 1, 0, [m0] ; C8010101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x18 ; C2038118 s_buffer_load_dword s20, s[0:3], 0x19 ; C20A0119 s_buffer_load_dword s21, s[0:3], 0x1a ; C20A811A s_buffer_load_dword s22, s[0:3], 0x1b ; C20B011B s_buffer_load_dword s0, s[0:3], 0x1c ; C200011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s22 ; 7E020216 v_mul_f32_e32 v2, s0, v2 ; 10040400 v_mul_f32_e32 v3, s0, v0 ; 10060000 image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00430202 v_mov_b32_e32 v0, s22 ; 7E000216 v_mov_b32_e32 v5, s22 ; 7E0A0216 v_mul_f32_e32 v5, s7, v5 ; 100A0A07 v_mul_f32_e32 v1, s20, v1 ; 10020214 v_mul_f32_e32 v0, s21, v0 ; 10000015 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v5, v2, s4 ; D2820002 00120505 v_mad_f32 v1, v1, v3, s5 ; D2820001 00160701 v_mad_f32 v0, v0, v4, s6 ; D2820000 001A0900 v_mul_f32_e32 v3, 0x3e62339c, v2 ; 100604FF 3E62339C v_madmk_f32_e32 v3, v1, v3, 0x3de90ff9 ; 40060701 3DE90FF9 v_madmk_f32_e32 v3, v0, v3, 0x3c271de7 ; 40060700 3C271DE7 v_mul_f32_e32 v4, 0x3ead9168, v2 ; 100804FF 3EAD9168 v_madmk_f32_e32 v4, v1, v4, 0x3f2d9168 ; 40080901 3F2D9168 v_madmk_f32_e32 v4, v0, v4, 0x3de76c8b ; 40080900 3DE76C8B v_mul_f32_e32 v2, 0x3ed63886, v2 ; 100404FF 3ED63886 v_madmk_f32_e32 v1, v1, v2, 0x3f3b5dcc ; 40020501 3F3B5DCC v_madmk_f32_e32 v0, v0, v1, 0x3e980347 ; 40000300 3E980347 v_max_f32_e32 v0, 0x358637bd, v0 ; 200000FF 358637BD v_rcp_f32_e32 v0, v0 ; 7E005500 v_max_f32_e32 v1, 0x358637bd, v3 ; 200206FF 358637BD v_max_f32_e32 v2, 0x358637bd, v4 ; 200408FF 358637BD v_log_f32_e64 v3, |v2| ; D34E0103 00000102 v_mul_f32_e32 v1, v1, v0 ; 10020101 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mov_b32_e32 v2, 0xff7fffff ; 7E0402FF FF7FFFFF v_max_f32_e32 v2, v3, v2 ; 20040503 v_madak_f32_e32 v3, 2.0, v2, 0x42fe0000 ; 420604F4 42FE0000 v_add_f32_e32 v4, v2, v2 ; 06080502 v_floor_f32_e32 v4, v4 ; 7E084904 v_mad_f32 v2, 2.0, v2, -v4 ; D2820002 841204F4 v_mov_b32_e32 v4, 0x437f0000 ; 7E0802FF 437F0000 v_mul_f32_e32 v5, 0x437f0000, v2 ; 100A04FF 437F0000 v_floor_f32_e32 v5, v5 ; 7E0A4905 v_mad_f32 v5, v2, v4, -v5 ; D2820005 84160902 v_mad_f32 v4, v2, v4, -v5 ; D2820004 84160902 v_madmk_f32_e32 v3, v4, v3, 0xbb808081 ; 40060704 BB808081 v_mul_f32_e32 v3, 0x3b808081, v3 ; 100606FF 3B808081 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_cvt_pkrtz_f16_f32_e32 v1, v3, v2 ; 5E020503 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 368 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1].xyz, COLOR DCL OUT[2], COLOR[1] DCL OUT[3], TEXCOORD[0] DCL OUT[4], TEXCOORD[1] DCL CONST[0..10] DCL TEMP[0..2] IMM[0] FLT32 { 0.0078, -1.0000, 1.0000, 0.0000} 0: MUL TEMP[0], CONST[7], IN[0].yyyy 1: MAD TEMP[0], CONST[6], IN[0].xxxx, TEMP[0] 2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[9], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 5: MAD TEMP[1], CONST[0], TEMP[0].xxxx, TEMP[1] 6: MAD TEMP[1], CONST[2], TEMP[0].zzzz, TEMP[1] 7: MAD OUT[0], CONST[3], TEMP[0].wwww, TEMP[1] 8: MAD TEMP[0], IN[2], IMM[0].xxxx, IMM[0].yyyy 9: MUL OUT[2].w, TEMP[0].wwww, CONST[10].xxxx 10: MAD TEMP[1].xyz, IN[1], IMM[0].xxxx, IMM[0].yyyy 11: MUL TEMP[2].xyz, TEMP[0].yzxw, TEMP[1] 12: MAD TEMP[1].xyz, TEMP[0], TEMP[1].yzxw, -TEMP[2] 13: MUL TEMP[1].xyz, TEMP[0].wwww, TEMP[1] 14: MUL TEMP[2].xyz, TEMP[0].yzxw, TEMP[1] 15: MAD TEMP[2].xyz, TEMP[1].zxyw, TEMP[0].zxyw, -TEMP[2] 16: MUL TEMP[2].xyz, TEMP[0].wwww, TEMP[2] 17: MUL TEMP[2].yw, TEMP[2].yyyy, CONST[7].xxzz 18: MAD TEMP[2].xy, CONST[6].xzzw, TEMP[2].xxxx, TEMP[2].ywzw 19: MAD TEMP[2].xy, CONST[8].xzzw, TEMP[2].zzzz, TEMP[2] 20: MUL TEMP[1].zw, TEMP[1].zzzz, CONST[7].xyxz 21: MAD TEMP[1].yz, CONST[6].xxzw, TEMP[1].yyyy, TEMP[1].xzww 22: MAD TEMP[1].xy, CONST[8].xzzw, TEMP[1].xxxx, TEMP[1].yzzw 23: MOV TEMP[2].z, TEMP[1].xxxx 24: MOV OUT[2].y, TEMP[1].yyyy 25: MUL TEMP[0].yw, TEMP[0].yyyy, CONST[7].xxzz 26: MAD TEMP[0].xy, CONST[6].xzzw, TEMP[0].xxxx, TEMP[0].ywzw 27: MAD TEMP[0].xy, CONST[8].xzzw, TEMP[0].zzzz, TEMP[0] 28: MOV TEMP[2].w, TEMP[0].xxxx 29: MOV OUT[2].z, TEMP[0].yyyy 30: MOV OUT[1].xyz, TEMP[2].xzww 31: MOV OUT[2].x, TEMP[2].yyyy 32: MOV OUT[3].xy, IN[3] 33: MOV OUT[3].zw, IN[4].xyyx 34: MUL OUT[4], IMM[0].zzww, IN[5].xyxx 35: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = add i32 %5, %7 %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %62, i32 0, i32 %63) %65 = extractelement <4 x float> %64, i32 0 %66 = extractelement <4 x float> %64, i32 1 %67 = extractelement <4 x float> %64, i32 2 %68 = extractelement <4 x float> %64, i32 3 %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = add i32 %5, %7 %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %71) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 %77 = add i32 %5, %7 %78 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %77) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 1 %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 5 %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = add i32 %5, %7 %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = fmul float %33, %51 %88 = fmul float %34, %51 %89 = fmul float %35, %51 %90 = fmul float %36, %51 %91 = fmul float %29, %50 %92 = fadd float %91, %87 %93 = fmul float %30, %50 %94 = fadd float %93, %88 %95 = fmul float %31, %50 %96 = fadd float %95, %89 %97 = fmul float %32, %50 %98 = fadd float %97, %90 %99 = fmul float %37, %52 %100 = fadd float %99, %92 %101 = fmul float %38, %52 %102 = fadd float %101, %94 %103 = fmul float %39, %52 %104 = fadd float %103, %96 %105 = fmul float %40, %52 %106 = fadd float %105, %98 %107 = fmul float %41, %53 %108 = fadd float %107, %100 %109 = fmul float %42, %53 %110 = fadd float %109, %102 %111 = fmul float %43, %53 %112 = fadd float %111, %104 %113 = fmul float %44, %53 %114 = fadd float %113, %106 %115 = fmul float %110, %17 %116 = fmul float %110, %18 %117 = fmul float %110, %19 %118 = fmul float %110, %20 %119 = fmul float %13, %108 %120 = fadd float %119, %115 %121 = fmul float %14, %108 %122 = fadd float %121, %116 %123 = fmul float %15, %108 %124 = fadd float %123, %117 %125 = fmul float %16, %108 %126 = fadd float %125, %118 %127 = fmul float %21, %112 %128 = fadd float %127, %120 %129 = fmul float %22, %112 %130 = fadd float %129, %122 %131 = fmul float %23, %112 %132 = fadd float %131, %124 %133 = fmul float %24, %112 %134 = fadd float %133, %126 %135 = fmul float %25, %114 %136 = fadd float %135, %128 %137 = fmul float %26, %114 %138 = fadd float %137, %130 %139 = fmul float %27, %114 %140 = fadd float %139, %132 %141 = fmul float %28, %114 %142 = fadd float %141, %134 %143 = fmul float %65, 0x3F80101020000000 %144 = fadd float %143, -1.000000e+00 %145 = fmul float %66, 0x3F80101020000000 %146 = fadd float %145, -1.000000e+00 %147 = fmul float %67, 0x3F80101020000000 %148 = fadd float %147, -1.000000e+00 %149 = fmul float %68, 0x3F80101020000000 %150 = fadd float %149, -1.000000e+00 %151 = fmul float %150, %45 %152 = fmul float %58, 0x3F80101020000000 %153 = fadd float %152, -1.000000e+00 %154 = fmul float %59, 0x3F80101020000000 %155 = fadd float %154, -1.000000e+00 %156 = fmul float %60, 0x3F80101020000000 %157 = fadd float %156, -1.000000e+00 %158 = fmul float %146, %153 %159 = fmul float %148, %155 %160 = fmul float %144, %157 %161 = fmul float %144, %155 %162 = fsub float %161, %158 %163 = fmul float %146, %157 %164 = fsub float %163, %159 %165 = fmul float %148, %153 %166 = fsub float %165, %160 %167 = fmul float %150, %162 %168 = fmul float %150, %164 %169 = fmul float %150, %166 %170 = fmul float %146, %167 %171 = fmul float %148, %168 %172 = fmul float %144, %169 %173 = fmul float %169, %148 %174 = fsub float %173, %170 %175 = fmul float %167, %144 %176 = fsub float %175, %171 %177 = fmul float %168, %146 %178 = fsub float %177, %172 %179 = fmul float %150, %174 %180 = fmul float %150, %176 %181 = fmul float %150, %178 %182 = fmul float %180, %33 %183 = fmul float %180, %35 %184 = fmul float %29, %179 %185 = fadd float %184, %182 %186 = fmul float %31, %179 %187 = fadd float %186, %183 %188 = fmul float %37, %181 %189 = fadd float %188, %185 %190 = fmul float %39, %181 %191 = fadd float %190, %187 %192 = fmul float %169, %33 %193 = fmul float %169, %35 %194 = fmul float %29, %168 %195 = fadd float %194, %192 %196 = fmul float %31, %168 %197 = fadd float %196, %193 %198 = fmul float %37, %167 %199 = fadd float %198, %195 %200 = fmul float %39, %167 %201 = fadd float %200, %197 %202 = fmul float %146, %33 %203 = fmul float %146, %35 %204 = fmul float %29, %144 %205 = fadd float %204, %202 %206 = fmul float %31, %144 %207 = fadd float %206, %203 %208 = fmul float %37, %148 %209 = fadd float %208, %205 %210 = fmul float %39, %148 %211 = fadd float %210, %207 %212 = fmul float %85, 0.000000e+00 %213 = fmul float %85, 0.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %189, float %199, float %209, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %191, float %201, float %211, float %151) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %73, float %74, float %80, float %79) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %85, float %86, float %212, float %213) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %136, float %138, float %140, float %142) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 0x3c008081 ; 7E0202FF 3C008081 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C s_load_dwordx4 s[28:31], s[8:9], 0x10 ; C08E0910 s_load_dwordx4 s[4:7], s[8:9], 0x14 ; C0820914 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1c ; C204011C buffer_load_format_xyzw v[2:5], v0, s[12:15], 0 idxen ; E00C2000 80030200 buffer_load_format_xyzw v[6:9], v0, s[16:19], 0 idxen ; E00C2000 80040600 s_buffer_load_dword s9, s[0:3], 0x1d ; C204811D s_buffer_load_dword s10, s[0:3], 0x1e ; C205011E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[20:23], 0 idxen ; E00C2000 80050900 buffer_load_format_xyzw v[13:16], v0, s[24:27], 0 idxen ; E00C2000 80060D00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[15:18], v0, s[28:31], 0 idxen ; E00C2000 80070F00 s_buffer_load_dword s11, s[0:3], 0x1f ; C205811F s_buffer_load_dword s12, s[0:3], 0x20 ; C2060120 s_buffer_load_dword s13, s[0:3], 0x18 ; C2068118 s_buffer_load_dword s14, s[0:3], 0x19 ; C2070119 s_buffer_load_dword s15, s[0:3], 0x1a ; C207811A s_buffer_load_dword s16, s[0:3], 0x1b ; C208011B s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[17:20], v0, s[4:7], 0 idxen ; E00C2000 80011100 v_mad_f32 v0, v6, v1, -1.0 ; D2820000 03CE0306 v_mad_f32 v6, v9, v1, -1.0 ; D2820006 03CE0309 v_mad_f32 v9, v10, v1, -1.0 ; D2820009 03CE030A v_mad_f32 v10, v11, v1, -1.0 ; D282000A 03CE030B v_mad_f32 v11, v12, v1, -1.0 ; D282000B 03CE030C v_mad_f32 v7, v7, v1, -1.0 ; D2820007 03CE0307 v_mad_f32 v1, v8, v1, -1.0 ; D2820001 03CE0308 v_mul_f32_e32 v8, v0, v9 ; 10101300 v_mad_f32 v8, v6, v7, -v8 ; D2820008 84220F06 v_mul_f32_e32 v7, v7, v10 ; 100E1507 v_mad_f32 v7, v9, v1, -v7 ; D2820007 841E0309 v_mul_f32_e32 v1, v1, v6 ; 10020D01 v_mad_f32 v0, v10, v0, -v1 ; D2820000 8406010A v_mul_f32_e32 v1, s8, v9 ; 10021208 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s13, v6, v1 ; D2820001 04060C0D v_mul_f32_e32 v8, v8, v11 ; 10101708 v_mul_f32_e32 v7, v7, v11 ; 100E1707 v_mul_f32_e32 v0, v0, v11 ; 10001700 v_mad_f32 v1, s12, v10, v1 ; D2820001 0406140C v_mul_f32_e32 v12, v8, v9 ; 10181308 v_mad_f32 v12, v0, v10, -v12 ; D282000C 84321500 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v19, v7, v10 ; 10261507 v_mad_f32 v19, v8, v6, -v19 ; D2820013 844E0D08 v_mul_f32_e32 v20, v0, v6 ; 10280D00 v_mad_f32 v20, v7, v9, -v20 ; D2820014 84521307 v_mul_f32_e32 v21, s8, v0 ; 102A0008 v_mad_f32 v21, s13, v7, v21 ; D2820015 04560E0D v_mul_f32_e32 v12, v12, v11 ; 1018170C v_mul_f32_e32 v19, v19, v11 ; 10261713 v_mul_f32_e32 v20, v20, v11 ; 10281714 v_mad_f32 v21, s12, v8, v21 ; D2820015 0456100C v_mul_f32_e32 v22, s8, v19 ; 102C2608 v_mad_f32 v22, s13, v12, v22 ; D2820016 045A180D s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 v_mad_f32 v22, s12, v20, v22 ; D2820016 045A280C v_mov_b32_e32 v23, 0 ; 7E2E0280 exp 15, 32, 0, 0, 0, v22, v21, v1, v23 ; F800020F 17011516 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v1, s10, v9 ; 1002120A v_mad_f32 v1, s15, v6, v1 ; D2820001 04060C0F s_buffer_load_dword s5, s[0:3], 0x28 ; C2028128 s_buffer_load_dword s6, s[0:3], 0x23 ; C2030123 s_buffer_load_dword s7, s[0:3], 0x24 ; C2038124 s_buffer_load_dword s17, s[0:3], 0x25 ; C2088125 v_mad_f32 v1, s4, v10, v1 ; D2820001 04061404 v_mul_f32_e32 v0, s10, v0 ; 1000000A v_mad_f32 v0, s15, v7, v0 ; D2820000 04020E0F v_mad_f32 v0, s4, v8, v0 ; D2820000 04021004 v_mul_f32_e32 v6, s10, v19 ; 100C260A v_mad_f32 v6, s15, v12, v6 ; D2820006 041A180F v_mad_f32 v6, s4, v20, v6 ; D2820006 041A2804 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s5, v11 ; 100E1605 exp 15, 33, 0, 0, 0, v6, v0, v1, v7 ; F800021F 07010006 exp 15, 34, 0, 0, 0, v13, v14, v16, v15 ; F800022F 0F100E0D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s8, v3 ; 10000608 v_mad_f32 v0, s13, v2, v0 ; D2820000 0402040D s_buffer_load_dword s5, s[0:3], 0x21 ; C2028121 v_mul_f32_e32 v1, s9, v3 ; 10020609 v_mad_f32 v1, s14, v2, v1 ; D2820001 0406040E v_mul_f32_e32 v6, s10, v3 ; 100C060A v_mad_f32 v6, s15, v2, v6 ; D2820006 041A040F s_buffer_load_dword s8, s[0:3], 0x26 ; C2040126 s_buffer_load_dword s9, s[0:3], 0x27 ; C2048127 v_mul_f32_e32 v3, s11, v3 ; 1006060B v_mad_f32 v2, s16, v2, v3 ; D2820002 040E0410 v_mad_f32 v0, s12, v4, v0 ; D2820000 0402080C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s5, v4, v1 ; D2820001 04060805 v_mad_f32 v3, s4, v4, v6 ; D2820003 041A0804 v_mad_f32 v2, s6, v4, v2 ; D2820002 040A0806 v_mad_f32 v0, s7, v5, v0 ; D2820000 04020A07 v_mad_f32 v1, s17, v5, v1 ; D2820001 04060A11 v_mad_f32 v3, s8, v5, v3 ; D2820003 040E0A08 v_mad_f32 v2, s9, v5, v2 ; D2820002 040A0A09 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s8, s[0:3], 0x3 ; C2040103 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_buffer_load_dword s10, s[0:3], 0x5 ; C2050105 s_buffer_load_dword s11, s[0:3], 0x6 ; C2058106 s_buffer_load_dword s12, s[0:3], 0x7 ; C2060107 s_buffer_load_dword s13, s[0:3], 0x8 ; C2068108 s_buffer_load_dword s14, s[0:3], 0x9 ; C2070109 s_buffer_load_dword s15, s[0:3], 0xa ; C207810A s_buffer_load_dword s16, s[0:3], 0xb ; C208010B s_buffer_load_dword s17, s[0:3], 0xc ; C208810C s_buffer_load_dword s18, s[0:3], 0xd ; C209010D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s9, v1 ; 10080209 v_mul_f32_e32 v5, s10, v1 ; 100A020A v_mul_f32_e32 v6, s11, v1 ; 100C020B v_mul_f32_e32 v1, s12, v1 ; 1002020C v_mad_f32 v4, s5, v0, v4 ; D2820004 04120005 v_mad_f32 v5, s6, v0, v5 ; D2820005 04160006 v_mad_f32 v6, s7, v0, v6 ; D2820006 041A0007 v_mad_f32 v0, s8, v0, v1 ; D2820000 04060008 v_mad_f32 v1, s13, v3, v4 ; D2820001 0412060D v_mad_f32 v4, s14, v3, v5 ; D2820004 0416060E v_mad_f32 v5, s15, v3, v6 ; D2820005 041A060F v_mul_f32_e32 v6, 0, v17 ; 100C2280 exp 15, 35, 0, 0, 0, v17, v18, v6, v6 ; F800023F 06061211 v_mad_f32 v0, s16, v3, v0 ; D2820000 04020610 v_mad_f32 v1, s17, v2, v1 ; D2820001 04060411 v_mad_f32 v3, s18, v2, v4 ; D2820003 04120412 v_mad_f32 v4, s0, v2, v5 ; D2820004 04160400 v_mad_f32 v0, s4, v2, v0 ; D2820000 04020404 exp 15, 12, 0, 1, 0, v1, v3, v4, v0 ; F80008CF 00040301 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 24 Code Size: 816 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..6] DCL TEMP[0..1] DCL TEMP[2], LOCAL IMM[0] FLT32 { 0.2209, 0.1138, 0.0102, 0.0000} IMM[1] FLT32 { 2.0000, 127.0000, 0.0000, -340282346638528859811704183484516925440.0000} IMM[2] FLT32 { 0.3390, 0.6780, 0.1130, 255.0000} IMM[3] FLT32 { 0.4184, 0.7319, 0.2969, 0.0039} 0: TEX TEMP[0], IN[0], SAMP[0], 2D 1: MUL TEMP[1].xyz, CONST[6], CONST[6].wwww 2: MAD TEMP[0].xyz, TEMP[0], TEMP[1], CONST[0] 3: DP3 TEMP[1].x, TEMP[0], IMM[0] 4: DP3 TEMP[1].y, TEMP[0], IMM[2] 5: DP3 TEMP[1].z, TEMP[0], IMM[3] 6: MAX TEMP[0].xyz, TEMP[1], IMM[0].wwww 7: RCP TEMP[0].z, TEMP[0].zzzz 8: MUL OUT[0].xy, TEMP[0].zzzz, TEMP[0] 9: LG2 TEMP[2].x, |TEMP[0].yyyy| 10: MAX TEMP[0].x, IMM[1].wwww, TEMP[2].xxxx 11: MAD TEMP[0].y, TEMP[0].xxxx, IMM[1].xxxx, IMM[1].yyyy 12: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].xxxx 13: FRC TEMP[0].x, TEMP[0].xxxx 14: MUL TEMP[0].z, TEMP[0].xxxx, IMM[2].wwww 15: MOV OUT[0].w, TEMP[0].xxxx 16: FRC TEMP[0].x, TEMP[0].zzzz 17: ADD TEMP[0].x, -TEMP[0].xxxx, TEMP[0].zzzz 18: MAD TEMP[0].x, TEMP[0].xxxx, -IMM[3].wwww, TEMP[0].yyyy 19: MUL OUT[0].z, TEMP[0].xxxx, IMM[3].wwww 20: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %31 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0 %33 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %32, <16 x i8> %34, i32 2) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = fmul float %27, %30 %46 = fmul float %28, %30 %47 = fmul float %29, %30 %48 = fmul float %42, %45 %49 = fadd float %48, %24 %50 = fmul float %43, %46 %51 = fadd float %50, %25 %52 = fmul float %44, %47 %53 = fadd float %52, %26 %54 = fmul float %49, 0x3FCC467380000000 %55 = fmul float %51, 0x3FBD21FF20000000 %56 = fadd float %55, %54 %57 = fmul float %53, 0x3F84E3BCE0000000 %58 = fadd float %56, %57 %59 = fmul float %49, 0x3FD5B22D00000000 %60 = fmul float %51, 0x3FE5B22D00000000 %61 = fadd float %60, %59 %62 = fmul float %53, 0x3FBCED9160000000 %63 = fadd float %61, %62 %64 = fmul float %49, 0x3FDAC710C0000000 %65 = fmul float %51, 0x3FE76BB980000000 %66 = fadd float %65, %64 %67 = fmul float %53, 0x3FD30068E0000000 %68 = fadd float %66, %67 %69 = call float @llvm.maxnum.f32(float %58, float 0x3EB0C6F7A0000000) %70 = call float @llvm.maxnum.f32(float %63, float 0x3EB0C6F7A0000000) %71 = call float @llvm.maxnum.f32(float %68, float 0x3EB0C6F7A0000000) %72 = fdiv float 1.000000e+00, %71 %73 = fmul float %72, %69 %74 = fmul float %72, %70 %75 = call float @fabs(float %70) %76 = call float @llvm.log2.f32(float %75) %77 = call float @llvm.maxnum.f32(float %76, float 0xC7EFFFFFE0000000) %78 = fmul float %77, 2.000000e+00 %79 = fadd float %78, 1.270000e+02 %80 = fadd float %77, %77 %81 = call float @llvm.AMDIL.fraction.(float %80) %82 = fmul float %81, 2.550000e+02 %83 = call float @llvm.AMDIL.fraction.(float %82) %84 = fsub float %82, %83 %85 = fmul float %84, 0xBF70101020000000 %86 = fadd float %85, %79 %87 = fmul float %86, 0x3F70101020000000 %88 = call i32 @llvm.SI.packf16(float %73, float %74) %89 = bitcast i32 %88 to float %90 = call i32 @llvm.SI.packf16(float %87, float %81) %91 = bitcast i32 %90 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %89, float %91, float %89, float %91) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v2, 0xff7fffff ; 7E0402FF FF7FFFFF v_mov_b32_e32 v3, 0x437f0000 ; 7E0602FF 437F0000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_buffer_load_dword s5, s[0:3], 0x1b ; C202811B s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118 s_buffer_load_dword s7, s[0:3], 0x19 ; C2038119 s_buffer_load_dword s20, s[0:3], 0x0 ; C20A0100 s_buffer_load_dword s21, s[0:3], 0x1 ; C20A8101 s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[4:6], 7, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800700 00430404 v_mov_b32_e32 v0, s5 ; 7E000205 v_mul_f32_e32 v0, s6, v0 ; 10000006 v_mov_b32_e32 v1, s5 ; 7E020205 v_mul_f32_e32 v1, s7, v1 ; 10020207 v_mov_b32_e32 v7, s5 ; 7E0E0205 v_mul_f32_e32 v7, s4, v7 ; 100E0E04 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v4, v0, s20 ; D2820000 00520104 v_mad_f32 v1, v5, v1, s21 ; D2820001 00560305 v_mad_f32 v4, v6, v7, s0 ; D2820004 00020F06 v_mul_f32_e32 v5, 0x3e62339c, v0 ; 100A00FF 3E62339C v_madmk_f32_e32 v5, v1, v5, 0x3de90ff9 ; 400A0B01 3DE90FF9 v_mul_f32_e32 v6, 0x3ead9168, v0 ; 100C00FF 3EAD9168 v_madmk_f32_e32 v6, v1, v6, 0x3f2d9168 ; 400C0D01 3F2D9168 v_mul_f32_e32 v0, 0x3ed63886, v0 ; 100000FF 3ED63886 v_madmk_f32_e32 v0, v1, v0, 0x3f3b5dcc ; 40000101 3F3B5DCC v_madmk_f32_e32 v1, v4, v5, 0x3c271de7 ; 40020B04 3C271DE7 v_madmk_f32_e32 v5, v4, v6, 0x3de76c8b ; 400A0D04 3DE76C8B v_madmk_f32_e32 v0, v4, v0, 0x3e980347 ; 40000104 3E980347 v_max_f32_e32 v0, 0x358637bd, v0 ; 200000FF 358637BD v_rcp_f32_e32 v0, v0 ; 7E005500 v_max_f32_e32 v4, 0x358637bd, v5 ; 20080AFF 358637BD v_log_f32_e64 v5, |v4| ; D34E0105 00000104 v_max_f32_e32 v1, 0x358637bd, v1 ; 200202FF 358637BD v_mul_f32_e32 v1, v1, v0 ; 10020101 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_max_f32_e32 v2, v5, v2 ; 20040505 v_madak_f32_e32 v4, 2.0, v2, 0x42fe0000 ; 420804F4 42FE0000 v_add_f32_e32 v5, v2, v2 ; 060A0502 v_floor_f32_e32 v5, v5 ; 7E0A4905 v_mad_f32 v2, 2.0, v2, -v5 ; D2820002 841604F4 v_mul_f32_e32 v5, 0x437f0000, v2 ; 100A04FF 437F0000 v_floor_f32_e32 v5, v5 ; 7E0A4905 v_mad_f32 v5, v2, v3, -v5 ; D2820005 84160702 v_mad_f32 v3, v2, v3, -v5 ; D2820003 84160702 v_madmk_f32_e32 v3, v3, v4, 0xbb808081 ; 40060903 BB808081 v_mul_f32_e32 v3, 0x3b808081, v3 ; 100606FF 3B808081 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_cvt_pkrtz_f16_f32_e32 v1, v3, v2 ; 5E020503 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 356 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], COLOR, PERSPECTIVE DCL IN[1], COLOR[1], PERSPECTIVE DCL IN[2], TEXCOORD[0], PERSPECTIVE DCL IN[3], TEXCOORD[1], PERSPECTIVE DCL IN[4], TEXCOORD[4], PERSPECTIVE DCL IN[5], TEXCOORD[5], PERSPECTIVE DCL IN[6], TEXCOORD[6], PERSPECTIVE DCL IN[7], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..9] IMM[0] FLT32 { 2.0000, -1.0000, -0.0000, 0.0000} IMM[1] FLT32 { 8.0000, 0.0398, 0.0000, 340282346638528859811704183484516925440.0000} 0: DP3 TEMP[0].x, IN[1], IN[1] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[1], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[0], IN[0] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[0], TEMP[0].xxxx 8: MUL TEMP[3].xyz, TEMP[1].zxyw, TEMP[2].yzxw 9: MAD TEMP[3].xyz, TEMP[1].yzxw, TEMP[2].zxyw, -TEMP[3] 10: MUL TEMP[3].xyz, TEMP[3], IN[1].wwww 11: DP3 TEMP[0].x, IN[6], IN[6] 12: RSQ TEMP[0].x, TEMP[0].xxxx 13: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 14: MUL TEMP[4].xyz, IN[6], TEMP[0].xxxx 15: DP3 TEMP[0].x, IN[4], IN[4] 16: RSQ TEMP[0].x, TEMP[0].xxxx 17: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 18: MUL TEMP[5].xyz, IN[4], TEMP[0].xxxx 19: TEX TEMP[6], IN[3], SAMP[2], 2D 20: MAD TEMP[6].xyz, TEMP[6], IMM[0].xxxx, IMM[0].yyyy 21: DP3 TEMP[0].x, TEMP[6], TEMP[6] 22: RSQ TEMP[0].x, TEMP[0].xxxx 23: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 24: MUL TEMP[7].xyz, TEMP[6], TEMP[0].xxxx 25: DP3 TEMP[1].w, TEMP[7], TEMP[4] 26: MUL TEMP[6].xyz, TEMP[1].wwww, TEMP[7] 27: MAD TEMP[4].xyz, TEMP[6], IMM[0].xxxx, -TEMP[4] 28: TEX TEMP[6], IN[2], SAMP[6], 2D 29: ADD TEMP[1].w, TEMP[6].xxxx, CONST[10].xxxx 30: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[10].yyyy 31: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].zzzz 32: POW TEMP[3].w, |TEMP[1].wwww|, CONST[10].zzzz 33: CMP TEMP[6].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[3].wwww 34: DP3 TEMP[1].w, IN[5], IN[5] 35: RSQ TEMP[0], |TEMP[1].wwww| 36: MIN TEMP[2].w, IMM[1].wwww, TEMP[0] 37: MUL TEMP[8].xyz, TEMP[2].wwww, IN[5] 38: DP3 TEMP[2].w, TEMP[8], -CONST[11] 39: ADD TEMP[2].w, TEMP[2].wwww, -CONST[12].xxxx 40: MUL_SAT TEMP[2].w, TEMP[2].wwww, CONST[12].yyyy 41: MUL TEMP[2].w, TEMP[2].wwww, TEMP[2].wwww 42: ADD TEMP[1].w, -TEMP[1].wwww, -IMM[0].yyyy 43: MUL TEMP[3].w, TEMP[6].zzzz, TEMP[1].wwww 44: MUL TEMP[8], TEMP[2].wwww, TEMP[3].wwww 45: MOV TEMP[9], TEMP[8] 46: KILL_IF TEMP[9] 47: UIF CONST[240].xxxx :53 48: RCP TEMP[3].w, IN[7].wwww 49: MUL TEMP[8].xy, TEMP[3].wwww, IN[7] 50: MAD TEMP[8].xy, TEMP[8], CONST[1], CONST[1].wzzw 51: TEX TEMP[9], TEMP[8], SAMP[0], 2D 52: MUL TEMP[6].xyz, TEMP[6].zzzz, TEMP[9] 53: ENDIF 54: MOV TEMP[8].y, IMM[0].yyyy 55: ADD TEMP[8].xyz, -TEMP[8].yyyy, -CONST[0] 56: TEX TEMP[9], IN[3], SAMP[3], 2D 57: MUL TEMP[9].xyw, CONST[6].xyzz, CONST[6].wwww 58: DP3 TEMP[2].x, TEMP[2], TEMP[4] 59: DP3 TEMP[2].y, TEMP[3], TEMP[4] 60: DP3 TEMP[2].z, TEMP[1], TEMP[4] 61: TEX TEMP[3], TEMP[2], SAMP[1], CUBE 62: MUL TEMP[1].xyz, TEMP[3], TEMP[9].xyww 63: TEX TEMP[3], IN[3], SAMP[4], 2D 64: MAD TEMP[1].xyz, TEMP[9].zzzz, TEMP[1], TEMP[3] 65: MUL TEMP[1].xyz, TEMP[8], TEMP[1] 66: MAD TEMP[1].xyz, TEMP[1], CONST[4].wwww, CONST[4] 67: MAX TEMP[2].x, TEMP[1].wwww, IMM[0].wwww 68: ADD TEMP[1].w, TEMP[2].xxxx, IMM[0].zzzz 69: POW TEMP[3].x, |TEMP[2].xxxx|, CONST[9].wwww 70: CMP TEMP[1].w, TEMP[1].wwww, IMM[0].wwww, TEMP[3].xxxx 71: MUL TEMP[2].xyz, CONST[7], CONST[7].wwww 72: TEX TEMP[3], IN[3], SAMP[5], 2D 73: MUL TEMP[2].xyz, TEMP[2], TEMP[3].xxxx 74: MAD TEMP[2].xyz, TEMP[2], CONST[5].wwww, CONST[5] 75: DP3_SAT TEMP[3].x, TEMP[7], TEMP[5] 76: DP3_SAT TEMP[3].z, TEMP[4], TEMP[5] 77: ADD TEMP[3].yw, TEMP[3].xxzz, IMM[0].zzzz 78: POW TEMP[4].x, |TEMP[3].zzzz|, CONST[8].xxxx 79: MOV TEMP[5].x, CONST[8].xxxx 80: ADD TEMP[3].z, TEMP[5].xxxx, IMM[1].xxxx 81: MUL TEMP[3].z, TEMP[3].zzzz, TEMP[4].xxxx 82: MUL TEMP[3].z, TEMP[3].zzzz, IMM[1].yyyy 83: MUL TEMP[1].xyz, TEMP[1], TEMP[3].xxxx 84: CMP TEMP[1].xyz, TEMP[3].yyyy, IMM[0].wwww, TEMP[1] 85: MUL TEMP[2].xyz, TEMP[2], TEMP[3].zzzz 86: CMP TEMP[2].xyz, TEMP[3].wwww, IMM[0].wwww, TEMP[2] 87: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 88: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1] 89: MUL TEMP[1].xyz, TEMP[6], TEMP[1] 90: MUL TEMP[1].xyz, TEMP[1], CONST[9] 91: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[1] 92: CMP OUT[0].xyz, -TEMP[8].wwww, TEMP[1], IMM[0].wwww 93: MOV OUT[0].w, IMM[0].wwww 94: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %57 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %58 = load <32 x i8>, <32 x i8> addrspace(2)* %57, align 32, !tbaa !0 %59 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %62 = bitcast <8 x i32> addrspace(2)* %61 to <32 x i8> addrspace(2)* %63 = load <32 x i8>, <32 x i8> addrspace(2)* %62, align 32, !tbaa !0 %64 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %65 = bitcast <4 x i32> addrspace(2)* %64 to <16 x i8> addrspace(2)* %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %68 = bitcast <8 x i32> addrspace(2)* %67 to <32 x i8> addrspace(2)* %69 = load <32 x i8>, <32 x i8> addrspace(2)* %68, align 32, !tbaa !0 %70 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %71 = bitcast <4 x i32> addrspace(2)* %70 to <16 x i8> addrspace(2)* %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %74 = bitcast <8 x i32> addrspace(2)* %73 to <32 x i8> addrspace(2)* %75 = load <32 x i8>, <32 x i8> addrspace(2)* %74, align 32, !tbaa !0 %76 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %77 = bitcast <4 x i32> addrspace(2)* %76 to <16 x i8> addrspace(2)* %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 %79 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %80 = bitcast <8 x i32> addrspace(2)* %79 to <32 x i8> addrspace(2)* %81 = load <32 x i8>, <32 x i8> addrspace(2)* %80, align 32, !tbaa !0 %82 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %83 = bitcast <4 x i32> addrspace(2)* %82 to <16 x i8> addrspace(2)* %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 %85 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %86 = bitcast <8 x i32> addrspace(2)* %85 to <32 x i8> addrspace(2)* %87 = load <32 x i8>, <32 x i8> addrspace(2)* %86, align 32, !tbaa !0 %88 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %89 = bitcast <4 x i32> addrspace(2)* %88 to <16 x i8> addrspace(2)* %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 %91 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %92 = bitcast <8 x i32> addrspace(2)* %91 to <32 x i8> addrspace(2)* %93 = load <32 x i8>, <32 x i8> addrspace(2)* %92, align 32, !tbaa !0 %94 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %95 = bitcast <4 x i32> addrspace(2)* %94 to <16 x i8> addrspace(2)* %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 %97 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %117 = fmul float %100, %100 %118 = fmul float %101, %101 %119 = fadd float %118, %117 %120 = fmul float %102, %102 %121 = fadd float %119, %120 %122 = call float @llvm.AMDGPU.rsq.clamped.f32(float %121) %123 = call float @llvm.minnum.f32(float %122, float 0x47EFFFFFE0000000) %124 = fmul float %100, %123 %125 = fmul float %101, %123 %126 = fmul float %102, %123 %127 = fmul float %97, %97 %128 = fmul float %98, %98 %129 = fadd float %128, %127 %130 = fmul float %99, %99 %131 = fadd float %129, %130 %132 = call float @llvm.AMDGPU.rsq.clamped.f32(float %131) %133 = call float @llvm.minnum.f32(float %132, float 0x47EFFFFFE0000000) %134 = fmul float %97, %133 %135 = fmul float %98, %133 %136 = fmul float %99, %133 %137 = fmul float %126, %135 %138 = fmul float %124, %136 %139 = fmul float %125, %134 %140 = fmul float %125, %136 %141 = fsub float %140, %137 %142 = fmul float %126, %134 %143 = fsub float %142, %138 %144 = fmul float %124, %135 %145 = fsub float %144, %139 %146 = fmul float %141, %103 %147 = fmul float %143, %103 %148 = fmul float %145, %103 %149 = fmul float %114, %114 %150 = fmul float %115, %115 %151 = fadd float %150, %149 %152 = fmul float %116, %116 %153 = fadd float %151, %152 %154 = call float @llvm.AMDGPU.rsq.clamped.f32(float %153) %155 = call float @llvm.minnum.f32(float %154, float 0x47EFFFFFE0000000) %156 = fmul float %114, %155 %157 = fmul float %115, %155 %158 = fmul float %116, %155 %159 = fmul float %108, %108 %160 = fmul float %109, %109 %161 = fadd float %160, %159 %162 = fmul float %110, %110 %163 = fadd float %161, %162 %164 = call float @llvm.AMDGPU.rsq.clamped.f32(float %163) %165 = call float @llvm.minnum.f32(float %164, float 0x47EFFFFFE0000000) %166 = fmul float %108, %165 %167 = fmul float %109, %165 %168 = fmul float %110, %165 %169 = bitcast float %106 to i32 %170 = bitcast float %107 to i32 %171 = insertelement <2 x i32> undef, i32 %169, i32 0 %172 = insertelement <2 x i32> %171, i32 %170, i32 1 %173 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %172, <32 x i8> %69, <16 x i8> %72, i32 2) %174 = extractelement <4 x float> %173, i32 0 %175 = extractelement <4 x float> %173, i32 1 %176 = extractelement <4 x float> %173, i32 2 %177 = fmul float %174, 2.000000e+00 %178 = fadd float %177, -1.000000e+00 %179 = fmul float %175, 2.000000e+00 %180 = fadd float %179, -1.000000e+00 %181 = fmul float %176, 2.000000e+00 %182 = fadd float %181, -1.000000e+00 %183 = fmul float %178, %178 %184 = fmul float %180, %180 %185 = fadd float %184, %183 %186 = fmul float %182, %182 %187 = fadd float %185, %186 %188 = call float @llvm.AMDGPU.rsq.clamped.f32(float %187) %189 = call float @llvm.minnum.f32(float %188, float 0x47EFFFFFE0000000) %190 = fmul float %178, %189 %191 = fmul float %180, %189 %192 = fmul float %182, %189 %193 = fmul float %190, %156 %194 = fmul float %191, %157 %195 = fadd float %194, %193 %196 = fmul float %192, %158 %197 = fadd float %195, %196 %198 = fmul float %197, %190 %199 = fmul float %197, %191 %200 = fmul float %197, %192 %201 = fmul float %198, 2.000000e+00 %202 = fsub float %201, %156 %203 = fmul float %199, 2.000000e+00 %204 = fsub float %203, %157 %205 = fmul float %200, 2.000000e+00 %206 = fsub float %205, %158 %207 = bitcast float %104 to i32 %208 = bitcast float %105 to i32 %209 = insertelement <2 x i32> undef, i32 %207, i32 0 %210 = insertelement <2 x i32> %209, i32 %208, i32 1 %211 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %210, <32 x i8> %93, <16 x i8> %96, i32 2) %212 = extractelement <4 x float> %211, i32 0 %213 = fadd float %212, %48 %214 = fmul float %213, %49 %215 = call float @llvm.AMDIL.clamp.(float %214, float 0.000000e+00, float 1.000000e+00) %216 = fadd float %215, 0xBEB0C6F7A0000000 %217 = call float @fabs(float %215) %218 = call float @llvm.pow.f32(float %217, float %50) %219 = call float @llvm.AMDGPU.cndlt(float %216, float 0.000000e+00, float %218) %220 = call float @llvm.AMDGPU.cndlt(float %216, float 0.000000e+00, float %218) %221 = call float @llvm.AMDGPU.cndlt(float %216, float 0.000000e+00, float %218) %222 = fmul float %111, %111 %223 = fmul float %112, %112 %224 = fadd float %223, %222 %225 = fmul float %113, %113 %226 = fadd float %224, %225 %227 = call float @fabs(float %226) %228 = call float @llvm.AMDGPU.rsq.clamped.f32(float %227) %229 = call float @llvm.minnum.f32(float %228, float 0x47EFFFFFE0000000) %230 = fmul float %229, %111 %231 = fmul float %229, %112 %232 = fmul float %229, %113 %233 = fmul float %51, %230 %234 = fsub float -0.000000e+00, %233 %235 = fmul float %52, %231 %236 = fsub float %234, %235 %237 = fmul float %53, %232 %238 = fsub float %236, %237 %239 = fsub float %238, %54 %240 = fmul float %239, %55 %241 = call float @llvm.AMDIL.clamp.(float %240, float 0.000000e+00, float 1.000000e+00) %242 = fmul float %241, %241 %243 = fsub float 1.000000e+00, %226 %244 = fmul float %221, %243 %245 = fmul float %242, %244 %246 = fmul float %242, %244 %247 = fmul float %242, %244 %248 = fmul float %242, %244 %249 = fcmp olt float %245, 0.000000e+00 %250 = fcmp olt float %246, 0.000000e+00 %251 = fcmp olt float %247, 0.000000e+00 %252 = fcmp olt float %248, 0.000000e+00 %253 = or i1 %252, %251 %254 = or i1 %253, %250 %255 = or i1 %254, %249 %256 = select i1 %255, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %256) %257 = bitcast float %56 to i32 %258 = icmp eq i32 %257, 0 br i1 %258, label %ENDIF, label %IF IF: ; preds = %main_body %259 = call float @llvm.SI.fs.interp(i32 3, i32 7, i32 %5, <2 x i32> %7) %260 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %5, <2 x i32> %7) %261 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %5, <2 x i32> %7) %262 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %263 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %264 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %265 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %266 = fdiv float 1.000000e+00, %259 %267 = fmul float %266, %261 %268 = fmul float %266, %260 %269 = fmul float %267, %265 %270 = fadd float %269, %262 %271 = fmul float %268, %264 %272 = fadd float %271, %263 %273 = bitcast float %270 to i32 %274 = bitcast float %272 to i32 %275 = insertelement <2 x i32> undef, i32 %273, i32 0 %276 = insertelement <2 x i32> %275, i32 %274, i32 1 %277 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %276, <32 x i8> %58, <16 x i8> %60, i32 2) %278 = extractelement <4 x float> %277, i32 0 %279 = extractelement <4 x float> %277, i32 1 %280 = extractelement <4 x float> %277, i32 2 %281 = fmul float %221, %278 %282 = fmul float %221, %279 %283 = fmul float %221, %280 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp24.0 = phi float [ %281, %IF ], [ %219, %main_body ] %temp25.0 = phi float [ %282, %IF ], [ %220, %main_body ] %temp26.0 = phi float [ %283, %IF ], [ %221, %main_body ] %284 = fsub float 1.000000e+00, %24 %285 = fsub float 1.000000e+00, %25 %286 = fsub float 1.000000e+00, %26 %287 = bitcast float %106 to i32 %288 = bitcast float %107 to i32 %289 = insertelement <2 x i32> undef, i32 %287, i32 0 %290 = insertelement <2 x i32> %289, i32 %288, i32 1 %291 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %290, <32 x i8> %75, <16 x i8> %78, i32 2) %292 = extractelement <4 x float> %291, i32 2 %293 = fmul float %35, %38 %294 = fmul float %36, %38 %295 = fmul float %37, %38 %296 = fmul float %134, %202 %297 = fmul float %135, %204 %298 = fadd float %297, %296 %299 = fmul float %136, %206 %300 = fadd float %298, %299 %301 = fmul float %146, %202 %302 = fmul float %147, %204 %303 = fadd float %302, %301 %304 = fmul float %148, %206 %305 = fadd float %303, %304 %306 = fmul float %124, %202 %307 = fmul float %125, %204 %308 = fadd float %307, %306 %309 = fmul float %126, %206 %310 = fadd float %308, %309 %311 = insertelement <4 x float> undef, float %300, i32 0 %312 = insertelement <4 x float> %311, float %305, i32 1 %313 = insertelement <4 x float> %312, float %310, i32 2 %314 = insertelement <4 x float> %313, float %242, i32 3 %315 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %314) %316 = extractelement <4 x float> %315, i32 0 %317 = extractelement <4 x float> %315, i32 1 %318 = extractelement <4 x float> %315, i32 2 %319 = extractelement <4 x float> %315, i32 3 %320 = call float @fabs(float %318) %321 = fdiv float 1.000000e+00, %320 %322 = fmul float %316, %321 %323 = fadd float %322, 1.500000e+00 %324 = fmul float %317, %321 %325 = fadd float %324, 1.500000e+00 %326 = bitcast float %325 to i32 %327 = bitcast float %323 to i32 %328 = bitcast float %319 to i32 %329 = insertelement <4 x i32> undef, i32 %326, i32 0 %330 = insertelement <4 x i32> %329, i32 %327, i32 1 %331 = insertelement <4 x i32> %330, i32 %328, i32 2 %332 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %331, <32 x i8> %63, <16 x i8> %66, i32 4) %333 = extractelement <4 x float> %332, i32 0 %334 = extractelement <4 x float> %332, i32 1 %335 = extractelement <4 x float> %332, i32 2 %336 = fmul float %333, %293 %337 = fmul float %334, %294 %338 = fmul float %335, %295 %339 = bitcast float %106 to i32 %340 = bitcast float %107 to i32 %341 = insertelement <2 x i32> undef, i32 %339, i32 0 %342 = insertelement <2 x i32> %341, i32 %340, i32 1 %343 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %342, <32 x i8> %81, <16 x i8> %84, i32 2) %344 = extractelement <4 x float> %343, i32 0 %345 = extractelement <4 x float> %343, i32 1 %346 = extractelement <4 x float> %343, i32 2 %347 = fmul float %292, %336 %348 = fadd float %347, %344 %349 = fmul float %292, %337 %350 = fadd float %349, %345 %351 = fmul float %292, %338 %352 = fadd float %351, %346 %353 = fmul float %284, %348 %354 = fmul float %285, %350 %355 = fmul float %286, %352 %356 = fmul float %353, %30 %357 = fadd float %356, %27 %358 = fmul float %354, %30 %359 = fadd float %358, %28 %360 = fmul float %355, %30 %361 = fadd float %360, %29 %362 = call float @llvm.maxnum.f32(float %243, float 0.000000e+00) %363 = fadd float %362, 0xBEB0C6F7A0000000 %364 = call float @fabs(float %362) %365 = call float @llvm.pow.f32(float %364, float %47) %366 = call float @llvm.AMDGPU.cndlt(float %363, float 0.000000e+00, float %365) %367 = fmul float %39, %42 %368 = fmul float %40, %42 %369 = fmul float %41, %42 %370 = bitcast float %106 to i32 %371 = bitcast float %107 to i32 %372 = insertelement <2 x i32> undef, i32 %370, i32 0 %373 = insertelement <2 x i32> %372, i32 %371, i32 1 %374 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %373, <32 x i8> %87, <16 x i8> %90, i32 2) %375 = extractelement <4 x float> %374, i32 0 %376 = fmul float %367, %375 %377 = fmul float %368, %375 %378 = fmul float %369, %375 %379 = fmul float %376, %34 %380 = fadd float %379, %31 %381 = fmul float %377, %34 %382 = fadd float %381, %32 %383 = fmul float %378, %34 %384 = fadd float %383, %33 %385 = fmul float %190, %166 %386 = fmul float %191, %167 %387 = fadd float %386, %385 %388 = fmul float %192, %168 %389 = fadd float %387, %388 %390 = call float @llvm.AMDIL.clamp.(float %389, float 0.000000e+00, float 1.000000e+00) %391 = fmul float %202, %166 %392 = fmul float %204, %167 %393 = fadd float %392, %391 %394 = fmul float %206, %168 %395 = fadd float %393, %394 %396 = call float @llvm.AMDIL.clamp.(float %395, float 0.000000e+00, float 1.000000e+00) %397 = fadd float %390, 0xBEB0C6F7A0000000 %398 = fadd float %396, 0xBEB0C6F7A0000000 %399 = call float @fabs(float %396) %400 = call float @llvm.pow.f32(float %399, float %43) %401 = fadd float %43, 8.000000e+00 %402 = fmul float %401, %400 %403 = fmul float %402, 0x3FA45F3060000000 %404 = fmul float %357, %390 %405 = fmul float %359, %390 %406 = fmul float %361, %390 %407 = call float @llvm.AMDGPU.cndlt(float %397, float 0.000000e+00, float %404) %408 = call float @llvm.AMDGPU.cndlt(float %397, float 0.000000e+00, float %405) %409 = call float @llvm.AMDGPU.cndlt(float %397, float 0.000000e+00, float %406) %410 = fmul float %380, %403 %411 = fmul float %382, %403 %412 = fmul float %384, %403 %413 = call float @llvm.AMDGPU.cndlt(float %398, float 0.000000e+00, float %410) %414 = call float @llvm.AMDGPU.cndlt(float %398, float 0.000000e+00, float %411) %415 = call float @llvm.AMDGPU.cndlt(float %398, float 0.000000e+00, float %412) %416 = fadd float %407, %413 %417 = fadd float %408, %414 %418 = fadd float %409, %415 %419 = fmul float %366, %416 %420 = fmul float %366, %417 %421 = fmul float %366, %418 %422 = fmul float %temp24.0, %419 %423 = fmul float %temp25.0, %420 %424 = fmul float %temp26.0, %421 %425 = fmul float %422, %44 %426 = fmul float %423, %45 %427 = fmul float %424, %46 %428 = fmul float %242, %425 %429 = fmul float %242, %426 %430 = fmul float %242, %427 %431 = fsub float -0.000000e+00, %248 %432 = call float @llvm.AMDGPU.cndlt(float %431, float %428, float 0.000000e+00) %433 = fsub float -0.000000e+00, %248 %434 = call float @llvm.AMDGPU.cndlt(float %433, float %429, float 0.000000e+00) %435 = fsub float -0.000000e+00, %248 %436 = call float @llvm.AMDGPU.cndlt(float %435, float %430, float 0.000000e+00) %437 = call i32 @llvm.SI.packf16(float %432, float %434) %438 = bitcast i32 %437 to float %439 = call i32 @llvm.SI.packf16(float %436, float 0.000000e+00) %440 = bitcast i32 %439 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %438, float %440, float %438, float %440) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_movk_i32 s32, 0xf00 ; B0200F00 s_load_dwordx4 s[8:11], s[4:5], 0x8 ; C0840508 v_interp_p1_f32 v9, v0, 0, 0, [m0] ; C8240000 v_interp_p2_f32 v9, [v9], v1, 0, 0, [m0] ; C8250001 v_interp_p1_f32 v10, v0, 1, 0, [m0] ; C8280100 v_interp_p2_f32 v10, [v10], v1, 1, 0, [m0] ; C8290101 v_interp_p1_f32 v11, v0, 2, 0, [m0] ; C82C0200 v_interp_p2_f32 v11, [v11], v1, 2, 0, [m0] ; C82D0201 v_interp_p1_f32 v12, v0, 0, 1, [m0] ; C8300400 v_interp_p2_f32 v12, [v12], v1, 0, 1, [m0] ; C8310401 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s33, s[0:3], 0x29 ; C2108129 s_buffer_load_dword s34, s[0:3], 0x2a ; C211012A s_buffer_load_dword s35, s[0:3], 0x2c ; C211812C s_buffer_load_dword s36, s[0:3], 0x2d ; C212012D s_buffer_load_dword s37, s[0:3], 0x2e ; C212812E v_interp_p1_f32 v14, v0, 1, 1, [m0] ; C8380500 v_interp_p2_f32 v14, [v14], v1, 1, 1, [m0] ; C8390501 v_interp_p1_f32 v15, v0, 2, 1, [m0] ; C83C0600 v_interp_p2_f32 v15, [v15], v1, 2, 1, [m0] ; C83D0601 v_interp_p1_f32 v5, v0, 3, 1, [m0] ; C8140700 v_interp_p2_f32 v5, [v5], v1, 3, 1, [m0] ; C8150701 s_buffer_load_dword s38, s[0:3], 0x28 ; C2130128 v_interp_p1_f32 v20, v0, 0, 2, [m0] ; C8500800 v_interp_p2_f32 v20, [v20], v1, 0, 2, [m0] ; C8510801 v_interp_p1_f32 v21, v0, 1, 2, [m0] ; C8540900 v_interp_p2_f32 v21, [v21], v1, 1, 2, [m0] ; C8550901 v_interp_p1_f32 v3, v0, 0, 3, [m0] ; C80C0C00 v_interp_p2_f32 v3, [v3], v1, 0, 3, [m0] ; C80D0C01 v_interp_p1_f32 v4, v0, 1, 3, [m0] ; C8100D00 v_interp_p2_f32 v4, [v4], v1, 1, 3, [m0] ; C8110D01 v_interp_p1_f32 v17, v0, 0, 4, [m0] ; C8441000 v_interp_p2_f32 v17, [v17], v1, 0, 4, [m0] ; C8451001 v_interp_p1_f32 v18, v0, 1, 4, [m0] ; C8481100 v_interp_p2_f32 v18, [v18], v1, 1, 4, [m0] ; C8491101 v_interp_p1_f32 v2, v0, 2, 4, [m0] ; C8081200 v_interp_p2_f32 v2, [v2], v1, 2, 4, [m0] ; C8091201 v_interp_p1_f32 v13, v0, 0, 5, [m0] ; C8341400 v_interp_p2_f32 v13, [v13], v1, 0, 5, [m0] ; C8351401 v_interp_p1_f32 v16, v0, 1, 5, [m0] ; C8401500 v_interp_p2_f32 v16, [v16], v1, 1, 5, [m0] ; C8411501 v_interp_p1_f32 v22, v0, 2, 5, [m0] ; C8581600 v_interp_p2_f32 v22, [v22], v1, 2, 5, [m0] ; C8591601 v_interp_p1_f32 v8, v0, 0, 6, [m0] ; C8201800 v_interp_p2_f32 v8, [v8], v1, 0, 6, [m0] ; C8211801 v_interp_p1_f32 v7, v0, 1, 6, [m0] ; C81C1900 v_interp_p2_f32 v7, [v7], v1, 1, 6, [m0] ; C81D1901 v_interp_p1_f32 v6, v0, 2, 6, [m0] ; C8181A00 v_interp_p2_f32 v6, [v6], v1, 2, 6, [m0] ; C8191A01 s_load_dwordx4 s[12:15], s[4:5], 0x18 ; C0860518 s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710 s_load_dwordx8 s[16:23], s[6:7], 0x30 ; C0C80730 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[24:31], s[8:11] ; F0800700 00461703 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v19, 2.0, v23, -1.0 ; D2820013 03CE2EF4 v_mad_f32 v28, 2.0, v24, -1.0 ; D282001C 03CE30F4 v_mad_f32 v29, 2.0, v25, -1.0 ; D282001D 03CE32F4 image_sample v20, 1, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[16:23], s[12:15] ; F0800100 00641414 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v20, s38, v20 ; 06282826 v_mul_f32_e32 v20, s33, v20 ; 10282821 v_add_f32_e64 v20, 0, v20 clamp ; D2060814 00022880 v_mul_f32_e32 v21, v13, v13 ; 102A1B0D v_mad_f32 v21, v16, v16, v21 ; D2820015 04562110 v_mad_f32 v21, v22, v22, v21 ; D2820015 04562D16 v_rsq_clamp_f32_e64 v23, |v21| ; D3580117 00000115 v_mov_b32_e32 v24, 0xb58637bd ; 7E3002FF B58637BD v_add_f32_e32 v24, v20, v24 ; 06303114 v_cmp_gt_f32_e32 vcc, 0, v24 ; 7C083080 v_min_f32_e32 v23, 0x7f7fffff, v23 ; 1E2E2EFF 7F7FFFFF v_mul_f32_e32 v13, v13, v23 ; 101A2F0D v_mul_f32_e32 v16, v16, v23 ; 10202F10 v_mul_f32_e32 v22, v22, v23 ; 102C2F16 s_buffer_load_dword s8, s[0:3], 0x30 ; C2040130 s_buffer_load_dword s9, s[0:3], 0x31 ; C2048131 v_and_b32_e32 v20, 0x7fffffff, v20 ; 362828FF 7FFFFFFF v_log_f32_e32 v20, v20 ; 7E284F14 v_mul_f32_e32 v13, s35, v13 ; 101A1A23 v_mad_f32 v13, -s36, v16, -v13 ; D282000D A4362024 v_mad_f32 v16, -s37, v22, v13 ; D2820010 24362C25 v_mul_legacy_f32_e32 v13, s34, v20 ; 0E1A2822 v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_cndmask_b32_e64 v13, v13, 0, vcc ; D200000D 01A9010D s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v16, s8, v16 ; 0A202008 v_mul_f32_e32 v16, s9, v16 ; 10202009 s_buffer_load_dword s28, s[0:3], s32 ; C20E0020 v_mul_f32_e32 v20, v12, v12 ; 1028190C v_mad_f32 v20, v14, v14, v20 ; D2820014 04521D0E v_mad_f32 v20, v15, v15, v20 ; D2820014 04521F0F v_rsq_clamp_f32_e32 v30, v20 ; 7E3C5914 v_mul_f32_e32 v20, v9, v9 ; 10281309 v_mad_f32 v20, v10, v10, v20 ; D2820014 0452150A v_mad_f32 v20, v11, v11, v20 ; D2820014 0452170B v_rsq_clamp_f32_e32 v31, v20 ; 7E3E5914 v_mul_f32_e32 v20, v8, v8 ; 10281108 v_mad_f32 v20, v7, v7, v20 ; D2820014 04520F07 v_mad_f32 v20, v6, v6, v20 ; D2820014 04520D06 v_rsq_clamp_f32_e32 v32, v20 ; 7E405914 v_mul_f32_e32 v20, v17, v17 ; 10282311 v_mad_f32 v20, v18, v18, v20 ; D2820014 04522512 v_mad_f32 v20, v2, v2, v20 ; D2820014 04520502 v_rsq_clamp_f32_e32 v33, v20 ; 7E425914 v_mul_f32_e32 v20, v19, v19 ; 10282713 v_mad_f32 v20, v28, v28, v20 ; D2820014 0452391C v_mad_f32 v20, v29, v29, v20 ; D2820014 04523B1D v_rsq_clamp_f32_e32 v34, v20 ; 7E445914 v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 v_sub_f32_e32 v21, 1.0, v21 ; 082A2AF2 s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124 s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125 s_buffer_load_dword s10, s[0:3], 0x26 ; C2050126 s_buffer_load_dword s12, s[0:3], 0x27 ; C2060127 s_buffer_load_dword s21, s[0:3], 0x0 ; C20A8100 s_buffer_load_dword s20, s[0:3], 0x1 ; C20A0101 s_buffer_load_dword s19, s[0:3], 0x2 ; C2098102 s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111 s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112 s_buffer_load_dword s29, s[0:3], 0x13 ; C20E8113 s_buffer_load_dword s13, s[0:3], 0x14 ; C2068114 s_buffer_load_dword s14, s[0:3], 0x15 ; C2070115 s_buffer_load_dword s15, s[0:3], 0x16 ; C2078116 s_buffer_load_dword s30, s[0:3], 0x17 ; C20F0117 s_buffer_load_dword s25, s[0:3], 0x18 ; C20C8118 s_buffer_load_dword s26, s[0:3], 0x19 ; C20D0119 s_buffer_load_dword s27, s[0:3], 0x1a ; C20D811A s_buffer_load_dword s31, s[0:3], 0x1b ; C20F811B s_buffer_load_dword s22, s[0:3], 0x1c ; C20B011C s_buffer_load_dword s23, s[0:3], 0x1d ; C20B811D s_buffer_load_dword s24, s[0:3], 0x1e ; C20C011E s_buffer_load_dword s32, s[0:3], 0x1f ; C210011F s_buffer_load_dword s11, s[0:3], 0x20 ; C2058120 v_mul_f32_e32 v22, v21, v13 ; 102C1B15 v_mul_f32_e32 v20, v16, v16 ; 10282110 v_mul_f32_e32 v16, v22, v20 ; 10202916 v_cmp_gt_f32_e32 vcc, 0, v16 ; 7C082080 v_cndmask_b32_e64 v22, 0, -1.0, vcc ; D2000016 01A9E680 v_cndmask_b32_e64 v22, v22, -1.0, vcc ; D2000016 01A9E716 v_cndmask_b32_e64 v22, v22, -1.0, vcc ; D2000016 01A9E716 v_cndmask_b32_e64 v22, v22, -1.0, vcc ; D2000016 01A9E716 v_cmpx_le_f32_e32 vcc, 0, v22 ; 7C262C80 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ne_i32_e64 s[34:35], 0, s28 ; D10A0022 00003880 v_mov_b32_e32 v26, s29 ; 7E34021D v_mov_b32_e32 v24, s30 ; 7E30021E v_mov_b32_e32 v27, s31 ; 7E36021F v_mov_b32_e32 v25, s32 ; 7E320220 v_mov_b32_e32 v22, v13 ; 7E2C030D v_mov_b32_e32 v23, v13 ; 7E2E030D s_and_saveexec_b64 s[28:29], s[34:35] ; BE9C2422 s_xor_b64 s[28:29], exec, s[28:29] ; 899C1C7E s_cbranch_execz BB0_2 ; BF880000 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_load_dwordx4 s[40:43], s[4:5], 0x0 ; C0940500 v_interp_p1_f32 v22, v0, 3, 7, [m0] ; C8581F00 s_buffer_load_dword s30, s[0:3], 0x6 ; C20F0106 s_buffer_load_dword s31, s[0:3], 0x7 ; C20F8107 s_buffer_load_dword s44, s[0:3], 0x4 ; C2160104 s_buffer_load_dword s45, s[0:3], 0x5 ; C2168105 v_interp_p2_f32 v22, [v22], v1, 3, 7, [m0] ; C8591F01 v_interp_p1_f32 v23, v0, 1, 7, [m0] ; C85C1D00 v_rcp_f32_e32 v22, v22 ; 7E2C5516 v_interp_p2_f32 v23, [v23], v1, 1, 7, [m0] ; C85D1D01 v_interp_p1_f32 v0, v0, 0, 7, [m0] ; C8001C00 v_interp_p2_f32 v0, [v0], v1, 0, 7, [m0] ; C8011C01 v_mul_f32_e32 v0, v0, v22 ; 10002D00 v_mul_f32_e32 v1, v23, v22 ; 10022D17 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v22, s31 ; 7E2C021F v_mad_f32 v22, s44, v0, v22 ; D2820016 045A002C v_mov_b32_e32 v0, s30 ; 7E00021E v_mad_f32 v23, s45, v1, v0 ; D2820017 0402022D image_sample v[35:37], 7, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[32:39], s[40:43] ; F0800700 01482316 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v22, v35, v13 ; 102C1B23 v_mul_f32_e32 v23, v36, v13 ; 102E1B24 v_mul_f32_e32 v13, v37, v13 ; 101A1B25 s_or_b64 exec, exec, s[28:29] ; 88FE1C7E v_min_f32_e32 v0, 0x7f7fffff, v30 ; 1E003CFF 7F7FFFFF v_mul_f32_e32 v1, v0, v12 ; 10021900 v_mul_f32_e32 v12, v0, v14 ; 10181D00 v_mul_f32_e32 v0, v0, v15 ; 10001F00 v_min_f32_e32 v14, 0x7f7fffff, v31 ; 1E1C3EFF 7F7FFFFF v_mul_f32_e32 v9, v14, v9 ; 1012130E v_mul_f32_e32 v10, v14, v10 ; 1014150E v_mul_f32_e32 v11, v14, v11 ; 1016170E v_min_f32_e32 v14, 0x7f7fffff, v32 ; 1E1C40FF 7F7FFFFF v_min_f32_e32 v15, 0x7f7fffff, v33 ; 1E1E42FF 7F7FFFFF v_mul_f32_e32 v30, v15, v17 ; 103C230F v_mul_f32_e32 v31, v15, v18 ; 103E250F v_min_f32_e32 v17, 0x7f7fffff, v34 ; 1E2244FF 7F7FFFFF v_mul_f32_e32 v32, v17, v19 ; 10402711 v_mul_f32_e32 v28, v17, v28 ; 10383911 v_mul_f32_e32 v29, v17, v29 ; 103A3B11 s_load_dwordx4 s[0:3], s[4:5], 0x4 ; C0800504 v_mul_f32_e32 v17, v14, v8 ; 1022110E v_mul_f32_e32 v17, v17, v32 ; 10224111 v_mul_f32_e32 v18, v14, v7 ; 10240F0E v_mad_f32 v17, v28, v18, v17 ; D2820011 0446251C v_mul_f32_e32 v18, v14, v6 ; 10240D0E v_mad_f32 v17, v29, v18, v17 ; D2820011 0446251D v_mul_f32_e32 v18, v32, v17 ; 10242320 v_mad_f32 v18, v17, v32, v18 ; D2820012 044A4111 v_mad_f32 v8, -v8, v14, v18 ; D2820008 244A1D08 v_mul_f32_e32 v18, v28, v17 ; 1024231C v_mad_f32 v18, v17, v28, v18 ; D2820012 044A3911 v_mad_f32 v7, -v7, v14, v18 ; D2820007 244A1D07 v_mul_f32_e32 v18, v29, v17 ; 1024231D v_mad_f32 v17, v17, v29, v18 ; D2820011 044A3B11 v_mad_f32 v6, -v6, v14, v17 ; D2820006 24461D06 v_mul_f32_e32 v14, v8, v9 ; 101C1308 v_mad_f32 v14, v10, v7, v14 ; D282000E 043A0F0A v_mad_f32 v17, v11, v6, v14 ; D2820011 043A0D0B v_mul_f32_e32 v14, v10, v0 ; 101C010A v_mad_f32 v14, v12, v11, -v14 ; D282000E 843A170C v_mul_f32_e32 v11, v11, v1 ; 1016030B v_mad_f32 v11, v0, v9, -v11 ; D282000B 842E1300 v_mul_f32_e32 v9, v9, v12 ; 10121909 v_mad_f32 v9, v1, v10, -v9 ; D2820009 84261501 v_mul_f32_e32 v10, v5, v14 ; 10141D05 v_mul_f32_e32 v11, v5, v11 ; 10161705 v_mul_f32_e32 v5, v5, v9 ; 100A1305 v_mul_f32_e32 v9, v8, v10 ; 10121508 v_mad_f32 v9, v11, v7, v9 ; D2820009 04260F0B v_mad_f32 v18, v5, v6, v9 ; D2820012 04260D05 v_mul_f32_e32 v1, v8, v1 ; 10020308 v_mad_f32 v1, v12, v7, v1 ; D2820001 04060F0C v_mad_f32 v19, v0, v6, v1 ; D2820013 04060D00 s_load_dwordx4 s[40:43], s[4:5], 0xc ; C094050C s_load_dwordx8 s[44:51], s[6:7], 0x18 ; C0D60718 v_cubeid_f32 v36, v17, v18, v19 ; D2880024 044E2511 v_cubema_f32 v35, v17, v18, v19 ; D28E0023 044E2511 v_cubesc_f32 v34, v17, v18, v19 ; D28A0022 044E2511 v_cubetc_f32 v33, v17, v18, v19 ; D28C0021 044E2511 s_load_dwordx8 s[52:59], s[6:7], 0x8 ; C0DA0708 v_rcp_f32_e64 v0, |v35| ; D3540100 00000123 s_load_dwordx4 s[60:63], s[4:5], 0x10 ; C09E0510 s_load_dwordx4 s[28:31], s[4:5], 0x14 ; C08E0514 s_load_dwordx8 s[64:71], s[6:7], 0x20 ; C0E00720 s_load_dwordx8 s[32:39], s[6:7], 0x28 ; C0D00728 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 4, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[44:51], s[40:43] ; F0800400 014B0103 v_mov_b32_e32 v5, 0x3fc00000 ; 7E0A02FF 3FC00000 v_mad_f32 v35, v33, v0, v5 ; D2820023 04160121 v_mad_f32 v34, v34, v0, v5 ; D2820022 04160122 image_sample v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[52:59], s[0:3] ; F0800700 000D0922 v_mul_f32_e32 v0, s25, v27 ; 10003619 v_mul_f32_e32 v5, s26, v27 ; 100A361A v_mul_f32_e32 v12, s27, v27 ; 1018361B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v9 ; 10001300 v_mul_f32_e32 v5, v5, v10 ; 100A1505 v_mul_f32_e32 v9, v12, v11 ; 1012170C image_sample v[10:12], 7, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[64:71], s[60:63] ; F0800700 01F00A03 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v1, v0, v10 ; D2820000 042A0101 v_mad_f32 v5, v1, v5, v11 ; D2820005 042E0B01 v_mad_f32 v1, v1, v9, v12 ; D2820001 04321301 v_mul_f32_e32 v9, v30, v32 ; 1012411E v_mad_f32 v9, v28, v31, v9 ; D2820009 04263F1C v_mul_f32_e32 v8, v30, v8 ; 1010111E v_mad_f32 v7, v7, v31, v8 ; D2820007 04223F07 image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[32:39], s[28:31] ; F0800100 00E80303 v_sub_f32_e64 v4, 1.0, s21 ; D2080004 00002AF2 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_sub_f32_e64 v4, 1.0, s20 ; D2080004 000028F2 v_mul_f32_e32 v4, v5, v4 ; 10080905 v_sub_f32_e64 v5, 1.0, s19 ; D2080005 000026F2 v_mul_f32_e32 v1, v1, v5 ; 10020B01 v_mad_f32 v0, v26, v0, s16 ; D2820000 0042011A v_mad_f32 v4, v4, v26, s17 ; D2820004 00463504 v_mad_f32 v1, v1, v26, s18 ; D2820001 004A3501 v_mul_f32_e32 v2, v15, v2 ; 1004050F v_max_f32_e32 v5, 0, v21 ; 200A2A80 v_mov_b32_e32 v8, 0xb58637bd ; 7E1002FF B58637BD v_add_f32_e32 v10, v8, v5 ; 06140B08 v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_mul_f32_e32 v10, s22, v25 ; 10143216 v_mul_f32_e32 v11, s23, v25 ; 10163217 v_mul_f32_e32 v12, s24, v25 ; 10183218 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, v3, v10 ; 10141503 v_mul_f32_e32 v11, v3, v11 ; 10161703 v_mul_f32_e32 v3, v3, v12 ; 10061903 v_mov_b32_e32 v12, 0x7fffffff ; 7E1802FF 7FFFFFFF v_and_b32_e32 v5, v5, v12 ; 360A1905 v_log_f32_e32 v5, v5 ; 7E0A4F05 v_mad_f32 v10, v24, v10, s13 ; D282000A 00361518 v_mad_f32 v11, v11, v24, s14 ; D282000B 003A310B v_mad_f32 v3, v3, v24, s15 ; D2820003 003E3103 v_mul_legacy_f32_e32 v5, s12, v5 ; 0E0A0A0C v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_cndmask_b32_e64 v5, v5, 0, vcc ; D2000005 01A90105 v_mad_f32 v9, v29, v2, v9 ; D2820009 0426051D v_mad_f32 v2, v6, v2, v7 ; D2820002 041E0506 v_add_f32_e64 v6, 0, v9 clamp ; D2060806 00021280 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_add_f32_e32 v6, v8, v6 ; 060C0D08 v_and_b32_e32 v7, v2, v12 ; 360E1902 v_log_f32_e32 v7, v7 ; 7E0E4F07 v_cmp_gt_f32_e32 vcc, 0, v6 ; 7C080C80 v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 v_cndmask_b32_e64 v4, v4, 0, vcc ; D2000004 01A90104 v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 v_add_f32_e32 v2, v8, v2 ; 06040508 v_mul_legacy_f32_e32 v6, s11, v7 ; 0E0C0E0B v_mov_b32_e32 v7, 0x41000000 ; 7E0E02FF 41000000 v_add_f32_e32 v7, s11, v7 ; 060E0E0B v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_mul_f32_e32 v6, v6, v7 ; 100C0F06 v_mul_f32_e32 v6, 0x3d22f983, v6 ; 100C0CFF 3D22F983 v_mul_f32_e32 v7, v6, v10 ; 100E1506 v_mul_f32_e32 v8, v6, v11 ; 10101706 v_mul_f32_e32 v3, v6, v3 ; 10060706 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v2, v7, 0, vcc ; D2000002 01A90107 v_cndmask_b32_e64 v6, v8, 0, vcc ; D2000006 01A90108 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_add_f32_e32 v0, v2, v0 ; 06000102 v_add_f32_e32 v2, v6, v4 ; 06040906 v_add_f32_e32 v1, v3, v1 ; 06020303 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mul_f32_e32 v1, v1, v5 ; 10020B01 v_mul_f32_e32 v0, v0, v22 ; 10002D00 v_mul_f32_e32 v2, v2, v23 ; 10042F02 v_mul_f32_e32 v1, v1, v13 ; 10021B01 v_mul_f32_e32 v0, s8, v0 ; 10000008 v_mul_f32_e32 v2, s9, v2 ; 10040409 v_mul_f32_e32 v1, s10, v1 ; 1002020A v_mul_f32_e32 v0, v0, v20 ; 10002900 v_mul_f32_e32 v2, v2, v20 ; 10042902 v_mul_f32_e32 v1, v1, v20 ; 10022901 v_xor_b32_e32 v3, 0x80000000, v16 ; 3A0620FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v0, 0, v0, vcc ; D2000000 01AA0080 v_cndmask_b32_e64 v2, 0, v2, vcc ; D2000002 01AA0480 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 v_cndmask_b32_e64 v1, 0, v1, vcc ; D2000001 01AA0280 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 40 Code Size: 1828 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL IN[2], TEXCOORD[4], PERSPECTIVE DCL IN[3], TEXCOORD[5], PERSPECTIVE DCL IN[4], TEXCOORD[6], PERSPECTIVE DCL IN[5], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..6] IMM[0] FLT32 { 2.0000, -1.0000, -0.0000, 0.0000} IMM[1] FLT32 { 8.0000, 0.0398, 0.0000, 340282346638528859811704183484516925440.0000} 0: DP3 TEMP[0].x, IN[4], IN[4] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[4], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[2], IN[2] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[2], TEMP[0].xxxx 8: TEX TEMP[3], IN[1], SAMP[1], 2D 9: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[0].x, TEMP[3], TEMP[3] 11: RSQ TEMP[0].x, TEMP[0].xxxx 12: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 13: MUL TEMP[4].xyz, TEMP[3], TEMP[0].xxxx 14: DP3 TEMP[1].w, TEMP[4], TEMP[1] 15: MUL TEMP[3].xyz, TEMP[1].wwww, TEMP[4] 16: MAD TEMP[1].xyz, TEMP[3], IMM[0].xxxx, -TEMP[1] 17: TEX TEMP[3], IN[0], SAMP[4], 2D 18: ADD TEMP[1].w, TEMP[3].xxxx, CONST[8].xxxx 19: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[8].yyyy 20: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].zzzz 21: POW TEMP[3].x, |TEMP[1].wwww|, CONST[8].zzzz 22: CMP TEMP[3].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[3].xxxx 23: DP3 TEMP[1].w, IN[3], IN[3] 24: RSQ TEMP[0], |TEMP[1].wwww| 25: MIN TEMP[2].w, IMM[1].wwww, TEMP[0] 26: MUL TEMP[5].xyz, TEMP[2].wwww, IN[3] 27: DP3 TEMP[2].w, TEMP[5], -CONST[9] 28: ADD TEMP[2].w, TEMP[2].wwww, -CONST[10].xxxx 29: MUL_SAT TEMP[2].w, TEMP[2].wwww, CONST[10].yyyy 30: MUL TEMP[2].w, TEMP[2].wwww, TEMP[2].wwww 31: ADD TEMP[1].w, -TEMP[1].wwww, -IMM[0].yyyy 32: MUL TEMP[3].w, TEMP[3].zzzz, TEMP[1].wwww 33: MUL TEMP[5], TEMP[2].wwww, TEMP[3].wwww 34: MOV TEMP[6], TEMP[5] 35: KILL_IF TEMP[6] 36: UIF CONST[240].xxxx :42 37: RCP TEMP[3].w, IN[5].wwww 38: MUL TEMP[5].xy, TEMP[3].wwww, IN[5] 39: MAD TEMP[5].xy, TEMP[5], CONST[1], CONST[1].wzzw 40: TEX TEMP[6], TEMP[5], SAMP[0], 2D 41: MUL TEMP[3].xyz, TEMP[3].zzzz, TEMP[6] 42: ENDIF 43: MOV TEMP[5].y, IMM[0].yyyy 44: ADD TEMP[5].xyz, -TEMP[5].yyyy, -CONST[0] 45: TEX TEMP[6], IN[1], SAMP[2], 2D 46: MUL TEMP[5].xyz, TEMP[5], TEMP[6] 47: MAD TEMP[5].xyz, TEMP[5], CONST[4].wwww, CONST[4] 48: MAX TEMP[3].w, TEMP[1].wwww, IMM[0].wwww 49: ADD TEMP[1].w, TEMP[3].wwww, IMM[0].zzzz 50: POW TEMP[4].w, |TEMP[3].wwww|, CONST[7].wwww 51: TEX TEMP[6], IN[1], SAMP[3], 2D 52: MAD TEMP[6].xyz, TEMP[6].xxxx, CONST[5].wwww, CONST[5] 53: DP3_SAT TEMP[3].w, TEMP[4], TEMP[2] 54: ADD TEMP[4].x, TEMP[3].wwww, IMM[0].zzzz 55: DP3_SAT TEMP[1].x, TEMP[1], TEMP[2] 56: ADD TEMP[1].y, TEMP[1].xxxx, IMM[0].zzzz 57: POW TEMP[2].x, |TEMP[1].xxxx|, CONST[6].xxxx 58: MOV TEMP[1].x, CONST[6].xxxx 59: ADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx 60: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 61: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 62: MUL TEMP[2].xyz, TEMP[3].wwww, TEMP[5] 63: CMP TEMP[2].xyz, TEMP[4].xxxx, IMM[0].wwww, TEMP[2] 64: MUL TEMP[4].xyz, TEMP[6], TEMP[1].xxxx 65: CMP TEMP[1], TEMP[1].yyyw, IMM[0].wwww, TEMP[4] 66: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 67: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1] 68: MUL TEMP[1].xyz, TEMP[3], TEMP[1] 69: MUL TEMP[1].xyz, TEMP[1], CONST[7] 70: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[1] 71: CMP OUT[0].xyz, -TEMP[5].wwww, TEMP[1], IMM[0].wwww 72: MOV OUT[0].w, IMM[0].wwww 73: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %49 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %50 = load <32 x i8>, <32 x i8> addrspace(2)* %49, align 32, !tbaa !0 %51 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %54 = bitcast <8 x i32> addrspace(2)* %53 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %57 = bitcast <4 x i32> addrspace(2)* %56 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %60 = bitcast <8 x i32> addrspace(2)* %59 to <32 x i8> addrspace(2)* %61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %63 = bitcast <4 x i32> addrspace(2)* %62 to <16 x i8> addrspace(2)* %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %72 = bitcast <8 x i32> addrspace(2)* %71 to <32 x i8> addrspace(2)* %73 = load <32 x i8>, <32 x i8> addrspace(2)* %72, align 32, !tbaa !0 %74 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %75 = bitcast <4 x i32> addrspace(2)* %74 to <16 x i8> addrspace(2)* %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 %77 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %90 = fmul float %87, %87 %91 = fmul float %88, %88 %92 = fadd float %91, %90 %93 = fmul float %89, %89 %94 = fadd float %92, %93 %95 = call float @llvm.AMDGPU.rsq.clamped.f32(float %94) %96 = call float @llvm.minnum.f32(float %95, float 0x47EFFFFFE0000000) %97 = fmul float %87, %96 %98 = fmul float %88, %96 %99 = fmul float %89, %96 %100 = fmul float %81, %81 %101 = fmul float %82, %82 %102 = fadd float %101, %100 %103 = fmul float %83, %83 %104 = fadd float %102, %103 %105 = call float @llvm.AMDGPU.rsq.clamped.f32(float %104) %106 = call float @llvm.minnum.f32(float %105, float 0x47EFFFFFE0000000) %107 = fmul float %81, %106 %108 = fmul float %82, %106 %109 = fmul float %83, %106 %110 = bitcast float %79 to i32 %111 = bitcast float %80 to i32 %112 = insertelement <2 x i32> undef, i32 %110, i32 0 %113 = insertelement <2 x i32> %112, i32 %111, i32 1 %114 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %113, <32 x i8> %55, <16 x i8> %58, i32 2) %115 = extractelement <4 x float> %114, i32 0 %116 = extractelement <4 x float> %114, i32 1 %117 = extractelement <4 x float> %114, i32 2 %118 = fmul float %115, 2.000000e+00 %119 = fadd float %118, -1.000000e+00 %120 = fmul float %116, 2.000000e+00 %121 = fadd float %120, -1.000000e+00 %122 = fmul float %117, 2.000000e+00 %123 = fadd float %122, -1.000000e+00 %124 = fmul float %119, %119 %125 = fmul float %121, %121 %126 = fadd float %125, %124 %127 = fmul float %123, %123 %128 = fadd float %126, %127 %129 = call float @llvm.AMDGPU.rsq.clamped.f32(float %128) %130 = call float @llvm.minnum.f32(float %129, float 0x47EFFFFFE0000000) %131 = fmul float %119, %130 %132 = fmul float %121, %130 %133 = fmul float %123, %130 %134 = fmul float %131, %97 %135 = fmul float %132, %98 %136 = fadd float %135, %134 %137 = fmul float %133, %99 %138 = fadd float %136, %137 %139 = fmul float %138, %131 %140 = fmul float %138, %132 %141 = fmul float %138, %133 %142 = fmul float %139, 2.000000e+00 %143 = fsub float %142, %97 %144 = fmul float %140, 2.000000e+00 %145 = fsub float %144, %98 %146 = fmul float %141, 2.000000e+00 %147 = fsub float %146, %99 %148 = bitcast float %77 to i32 %149 = bitcast float %78 to i32 %150 = insertelement <2 x i32> undef, i32 %148, i32 0 %151 = insertelement <2 x i32> %150, i32 %149, i32 1 %152 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %151, <32 x i8> %73, <16 x i8> %76, i32 2) %153 = extractelement <4 x float> %152, i32 0 %154 = fadd float %153, %40 %155 = fmul float %154, %41 %156 = call float @llvm.AMDIL.clamp.(float %155, float 0.000000e+00, float 1.000000e+00) %157 = fadd float %156, 0xBEB0C6F7A0000000 %158 = call float @fabs(float %156) %159 = call float @llvm.pow.f32(float %158, float %42) %160 = call float @llvm.AMDGPU.cndlt(float %157, float 0.000000e+00, float %159) %161 = call float @llvm.AMDGPU.cndlt(float %157, float 0.000000e+00, float %159) %162 = call float @llvm.AMDGPU.cndlt(float %157, float 0.000000e+00, float %159) %163 = fmul float %84, %84 %164 = fmul float %85, %85 %165 = fadd float %164, %163 %166 = fmul float %86, %86 %167 = fadd float %165, %166 %168 = call float @fabs(float %167) %169 = call float @llvm.AMDGPU.rsq.clamped.f32(float %168) %170 = call float @llvm.minnum.f32(float %169, float 0x47EFFFFFE0000000) %171 = fmul float %170, %84 %172 = fmul float %170, %85 %173 = fmul float %170, %86 %174 = fmul float %43, %171 %175 = fsub float -0.000000e+00, %174 %176 = fmul float %44, %172 %177 = fsub float %175, %176 %178 = fmul float %45, %173 %179 = fsub float %177, %178 %180 = fsub float %179, %46 %181 = fmul float %180, %47 %182 = call float @llvm.AMDIL.clamp.(float %181, float 0.000000e+00, float 1.000000e+00) %183 = fmul float %182, %182 %184 = fsub float 1.000000e+00, %167 %185 = fmul float %162, %184 %186 = fmul float %183, %185 %187 = fmul float %183, %185 %188 = fmul float %183, %185 %189 = fmul float %183, %185 %190 = fcmp olt float %186, 0.000000e+00 %191 = fcmp olt float %187, 0.000000e+00 %192 = fcmp olt float %188, 0.000000e+00 %193 = fcmp olt float %189, 0.000000e+00 %194 = or i1 %193, %192 %195 = or i1 %194, %191 %196 = or i1 %195, %190 %197 = select i1 %196, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %197) %198 = bitcast float %48 to i32 %199 = icmp eq i32 %198, 0 br i1 %199, label %ENDIF, label %IF IF: ; preds = %main_body %200 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %201 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %202 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %203 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %204 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %205 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %206 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %207 = fdiv float 1.000000e+00, %200 %208 = fmul float %207, %202 %209 = fmul float %207, %201 %210 = fmul float %208, %206 %211 = fadd float %210, %203 %212 = fmul float %209, %205 %213 = fadd float %212, %204 %214 = bitcast float %211 to i32 %215 = bitcast float %213 to i32 %216 = insertelement <2 x i32> undef, i32 %214, i32 0 %217 = insertelement <2 x i32> %216, i32 %215, i32 1 %218 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %217, <32 x i8> %50, <16 x i8> %52, i32 2) %219 = extractelement <4 x float> %218, i32 0 %220 = extractelement <4 x float> %218, i32 1 %221 = extractelement <4 x float> %218, i32 2 %222 = fmul float %162, %219 %223 = fmul float %162, %220 %224 = fmul float %162, %221 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp12.0 = phi float [ %222, %IF ], [ %160, %main_body ] %temp13.0 = phi float [ %223, %IF ], [ %161, %main_body ] %temp14.0 = phi float [ %224, %IF ], [ %162, %main_body ] %225 = fsub float 1.000000e+00, %24 %226 = fsub float 1.000000e+00, %25 %227 = fsub float 1.000000e+00, %26 %228 = bitcast float %79 to i32 %229 = bitcast float %80 to i32 %230 = insertelement <2 x i32> undef, i32 %228, i32 0 %231 = insertelement <2 x i32> %230, i32 %229, i32 1 %232 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %231, <32 x i8> %61, <16 x i8> %64, i32 2) %233 = extractelement <4 x float> %232, i32 0 %234 = extractelement <4 x float> %232, i32 1 %235 = extractelement <4 x float> %232, i32 2 %236 = fmul float %225, %233 %237 = fmul float %226, %234 %238 = fmul float %227, %235 %239 = fmul float %236, %30 %240 = fadd float %239, %27 %241 = fmul float %237, %30 %242 = fadd float %241, %28 %243 = fmul float %238, %30 %244 = fadd float %243, %29 %245 = call float @llvm.maxnum.f32(float %184, float 0.000000e+00) %246 = fadd float %245, 0xBEB0C6F7A0000000 %247 = call float @fabs(float %245) %248 = call float @llvm.pow.f32(float %247, float %39) %249 = bitcast float %79 to i32 %250 = bitcast float %80 to i32 %251 = insertelement <2 x i32> undef, i32 %249, i32 0 %252 = insertelement <2 x i32> %251, i32 %250, i32 1 %253 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %252, <32 x i8> %67, <16 x i8> %70, i32 2) %254 = extractelement <4 x float> %253, i32 0 %255 = fmul float %254, %34 %256 = fadd float %255, %31 %257 = fmul float %254, %34 %258 = fadd float %257, %32 %259 = fmul float %254, %34 %260 = fadd float %259, %33 %261 = fmul float %131, %107 %262 = fmul float %132, %108 %263 = fadd float %262, %261 %264 = fmul float %133, %109 %265 = fadd float %263, %264 %266 = call float @llvm.AMDIL.clamp.(float %265, float 0.000000e+00, float 1.000000e+00) %267 = fadd float %266, 0xBEB0C6F7A0000000 %268 = fmul float %143, %107 %269 = fmul float %145, %108 %270 = fadd float %269, %268 %271 = fmul float %147, %109 %272 = fadd float %270, %271 %273 = call float @llvm.AMDIL.clamp.(float %272, float 0.000000e+00, float 1.000000e+00) %274 = fadd float %273, 0xBEB0C6F7A0000000 %275 = call float @fabs(float %273) %276 = call float @llvm.pow.f32(float %275, float %35) %277 = fadd float %35, 8.000000e+00 %278 = fmul float %277, %276 %279 = fmul float %278, 0x3FA45F3060000000 %280 = fmul float %266, %240 %281 = fmul float %266, %242 %282 = fmul float %266, %244 %283 = call float @llvm.AMDGPU.cndlt(float %267, float 0.000000e+00, float %280) %284 = call float @llvm.AMDGPU.cndlt(float %267, float 0.000000e+00, float %281) %285 = call float @llvm.AMDGPU.cndlt(float %267, float 0.000000e+00, float %282) %286 = fmul float %256, %279 %287 = fmul float %258, %279 %288 = fmul float %260, %279 %289 = call float @llvm.AMDGPU.cndlt(float %274, float 0.000000e+00, float %286) %290 = call float @llvm.AMDGPU.cndlt(float %274, float 0.000000e+00, float %287) %291 = call float @llvm.AMDGPU.cndlt(float %274, float 0.000000e+00, float %288) %292 = call float @llvm.AMDGPU.cndlt(float %246, float 0.000000e+00, float %248) %293 = fadd float %289, %283 %294 = fadd float %290, %284 %295 = fadd float %291, %285 %296 = fmul float %292, %293 %297 = fmul float %292, %294 %298 = fmul float %292, %295 %299 = fmul float %temp12.0, %296 %300 = fmul float %temp13.0, %297 %301 = fmul float %temp14.0, %298 %302 = fmul float %299, %36 %303 = fmul float %300, %37 %304 = fmul float %301, %38 %305 = fmul float %183, %302 %306 = fmul float %183, %303 %307 = fmul float %183, %304 %308 = fsub float -0.000000e+00, %189 %309 = call float @llvm.AMDGPU.cndlt(float %308, float %305, float 0.000000e+00) %310 = fsub float -0.000000e+00, %189 %311 = call float @llvm.AMDGPU.cndlt(float %310, float %306, float 0.000000e+00) %312 = fsub float -0.000000e+00, %189 %313 = call float @llvm.AMDGPU.cndlt(float %312, float %307, float 0.000000e+00) %314 = call i32 @llvm.SI.packf16(float %309, float %311) %315 = bitcast i32 %314 to float %316 = call i32 @llvm.SI.packf16(float %313, float 0.000000e+00) %317 = bitcast i32 %316 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %315, float %317, float %315, float %317) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_movk_i32 s0, 0xf00 ; B0000F00 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v10, 0xb58637bd ; 7E1402FF B58637BD v_interp_p1_f32 v11, v0, 0, 0, [m0] ; C82C0000 v_interp_p2_f32 v11, [v11], v1, 0, 0, [m0] ; C82D0001 v_interp_p1_f32 v12, v0, 1, 0, [m0] ; C8300100 v_interp_p2_f32 v12, [v12], v1, 1, 0, [m0] ; C8310101 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s44, s[12:15], s0 ; C2160C00 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 s_buffer_load_dword s16, s[12:15], 0x0 ; C2080D00 s_buffer_load_dword s17, s[12:15], 0x1 ; C2088D01 s_buffer_load_dword s11, s[12:15], 0x2 ; C2058D02 s_buffer_load_dword s1, s[12:15], 0x10 ; C2008D10 s_buffer_load_dword s2, s[12:15], 0x11 ; C2010D11 s_buffer_load_dword s3, s[12:15], 0x12 ; C2018D12 s_buffer_load_dword s45, s[12:15], 0x13 ; C2168D13 s_buffer_load_dword s8, s[12:15], 0x14 ; C2040D14 s_buffer_load_dword s9, s[12:15], 0x15 ; C2048D15 s_buffer_load_dword s10, s[12:15], 0x16 ; C2050D16 s_buffer_load_dword s46, s[12:15], 0x17 ; C2170D17 s_buffer_load_dword s0, s[12:15], 0x18 ; C2000D18 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v13, v0, 0, 3, [m0] ; C8340C00 v_interp_p2_f32 v13, [v13], v1, 0, 3, [m0] ; C8350C01 s_buffer_load_dword s18, s[12:15], 0x25 ; C2090D25 v_interp_p1_f32 v14, v0, 1, 3, [m0] ; C8380D00 v_interp_p2_f32 v14, [v14], v1, 1, 3, [m0] ; C8390D01 v_interp_p1_f32 v15, v0, 2, 3, [m0] ; C83C0E00 v_interp_p2_f32 v15, [v15], v1, 2, 3, [m0] ; C83D0E01 v_interp_p1_f32 v6, v0, 0, 4, [m0] ; C8181000 v_interp_p2_f32 v6, [v6], v1, 0, 4, [m0] ; C8191001 v_interp_p1_f32 v5, v0, 1, 4, [m0] ; C8141100 v_interp_p2_f32 v5, [v5], v1, 1, 4, [m0] ; C8151101 v_interp_p1_f32 v4, v0, 2, 4, [m0] ; C8101200 v_interp_p2_f32 v4, [v4], v1, 2, 4, [m0] ; C8111201 s_load_dwordx4 s[24:27], s[4:5], 0x10 ; C08C0510 s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708 s_load_dwordx8 s[28:35], s[6:7], 0x20 ; C0CE0720 s_buffer_load_dword s19, s[12:15], 0x20 ; C2098D20 s_buffer_load_dword s47, s[12:15], 0x21 ; C2178D21 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[20:22], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[20:23] ; F0800700 00A91402 image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[28:35], s[24:27] ; F0800100 00C70B0B s_buffer_load_dword s20, s[12:15], 0x22 ; C20A0D22 s_buffer_load_dword s21, s[12:15], 0x24 ; C20A8D24 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v11, s19, v11 ; 06161613 v_mul_f32_e32 v11, s47, v11 ; 1016162F v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_and_b32_e32 v12, 0x7fffffff, v11 ; 361816FF 7FFFFFFF v_log_f32_e32 v12, v12 ; 7E184F0C v_add_f32_e32 v10, v11, v10 ; 0614150B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_legacy_f32_e32 v11, s20, v12 ; 0E161814 v_exp_f32_e32 v11, v11 ; 7E164B0B v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v10, v11, 0, vcc ; D200000A 01A9010B v_mul_f32_e32 v11, v13, v13 ; 10161B0D v_mad_f32 v11, v14, v14, v11 ; D282000B 042E1D0E v_mad_f32 v12, v15, v15, v11 ; D282000C 042E1F0F s_buffer_load_dword s19, s[12:15], 0x26 ; C2098D26 s_buffer_load_dword s20, s[12:15], 0x28 ; C20A0D28 s_buffer_load_dword s22, s[12:15], 0x29 ; C20B0D29 v_rsq_clamp_f32_e64 v11, |v12| ; D358010B 0000010C v_mad_f32 v18, 2.0, v20, -1.0 ; D2820012 03CE28F4 v_mad_f32 v19, 2.0, v21, -1.0 ; D2820013 03CE2AF4 v_mad_f32 v20, 2.0, v22, -1.0 ; D2820014 03CE2CF4 v_min_f32_e32 v11, 0x7f7fffff, v11 ; 1E1616FF 7F7FFFFF v_mul_f32_e32 v13, v13, v11 ; 101A170D v_mul_f32_e32 v13, s21, v13 ; 101A1A15 v_mul_f32_e32 v14, v14, v11 ; 101C170E v_mad_f32 v13, -s18, v14, -v13 ; D282000D A4361C12 v_mul_f32_e32 v11, v15, v11 ; 1016170F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v11, -s19, v11, v13 ; D282000B 24361613 v_subrev_f32_e32 v11, s20, v11 ; 0A161614 v_mul_f32_e32 v11, s22, v11 ; 10161616 v_mul_f32_e32 v13, v6, v6 ; 101A0D06 v_mad_f32 v13, v5, v5, v13 ; D282000D 04360B05 v_mad_f32 v13, v4, v4, v13 ; D282000D 04360904 v_rsq_clamp_f32_e32 v21, v13 ; 7E2A590D v_mul_f32_e32 v13, v7, v7 ; 101A0F07 v_mad_f32 v13, v8, v8, v13 ; D282000D 04361108 v_mad_f32 v13, v9, v9, v13 ; D282000D 04361309 v_rsq_clamp_f32_e32 v22, v13 ; 7E2C590D v_mul_f32_e32 v13, v18, v18 ; 101A2512 v_mad_f32 v13, v19, v19, v13 ; D282000D 04362713 v_mad_f32 v13, v20, v20, v13 ; D282000D 04362914 v_rsq_clamp_f32_e32 v23, v13 ; 7E2E590D v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_mul_f32_e32 v11, v11, v11 ; 1016170B v_sub_f32_e32 v13, 1.0, v12 ; 081A18F2 s_buffer_load_dword s21, s[12:15], 0x1f ; C20A8D1F s_buffer_load_dword s20, s[12:15], 0x1c ; C20A0D1C s_buffer_load_dword s19, s[12:15], 0x1d ; C2098D1D s_buffer_load_dword s18, s[12:15], 0x1e ; C2090D1E v_mul_f32_e32 v12, v13, v10 ; 1018150D v_mul_f32_e32 v12, v12, v11 ; 1018170C v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v14, 0, -1.0, vcc ; D200000E 01A9E680 v_cndmask_b32_e64 v14, v14, -1.0, vcc ; D200000E 01A9E70E v_cndmask_b32_e64 v14, v14, -1.0, vcc ; D200000E 01A9E70E v_cndmask_b32_e64 v14, v14, -1.0, vcc ; D200000E 01A9E70E v_cmpx_le_f32_e32 vcc, 0, v14 ; 7C261C80 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_mov_b32_e32 v17, s45 ; 7E22022D v_mov_b32_e32 v16, s46 ; 7E20022E v_cmp_ne_i32_e64 s[22:23], 0, s44 ; D10A0016 00005880 v_mov_b32_e32 v14, v10 ; 7E1C030A v_mov_b32_e32 v15, v10 ; 7E1E030A s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[22:23], s[22:23] ; BE962416 s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E s_cbranch_execz BB0_2 ; BF880000 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 v_interp_p1_f32 v14, v0, 3, 5, [m0] ; C8381700 s_buffer_load_dword s36, s[12:15], 0x6 ; C2120D06 s_buffer_load_dword s37, s[12:15], 0x7 ; C2128D07 s_buffer_load_dword s38, s[12:15], 0x4 ; C2130D04 s_buffer_load_dword s39, s[12:15], 0x5 ; C2138D05 v_interp_p2_f32 v14, [v14], v1, 3, 5, [m0] ; C8391701 v_interp_p1_f32 v15, v0, 1, 5, [m0] ; C83C1500 v_rcp_f32_e32 v14, v14 ; 7E1C550E v_interp_p2_f32 v15, [v15], v1, 1, 5, [m0] ; C83D1501 v_interp_p1_f32 v0, v0, 0, 5, [m0] ; C8001400 v_interp_p2_f32 v0, [v0], v1, 0, 5, [m0] ; C8011401 v_mul_f32_e32 v0, v0, v14 ; 10001D00 v_mul_f32_e32 v1, v15, v14 ; 10021D0F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v14, s37 ; 7E1C0225 v_mad_f32 v14, s38, v0, v14 ; D282000E 043A0026 v_mov_b32_e32 v0, s36 ; 7E000224 v_mad_f32 v15, s39, v1, v0 ; D282000F 04020227 image_sample v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[24:31], s[32:35] ; F0800700 0106180E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v14, v24, v10 ; 101C1518 v_mul_f32_e32 v15, v25, v10 ; 101E1519 v_mul_f32_e32 v10, v26, v10 ; 1014151A s_or_b64 exec, exec, s[22:23] ; 88FE167E v_min_f32_e32 v0, 0x7f7fffff, v21 ; 1E002AFF 7F7FFFFF v_min_f32_e32 v1, 0x7f7fffff, v22 ; 1E022CFF 7F7FFFFF v_mul_f32_e32 v7, v1, v7 ; 100E0F01 v_mul_f32_e32 v8, v1, v8 ; 10101101 v_mul_f32_e32 v1, v1, v9 ; 10021301 v_min_f32_e32 v9, 0x7f7fffff, v23 ; 1E122EFF 7F7FFFFF v_mul_f32_e32 v18, v9, v18 ; 10242509 v_mul_f32_e32 v19, v9, v19 ; 10262709 v_mul_f32_e32 v9, v9, v20 ; 10122909 v_sub_f32_e64 v20, 1.0, s16 ; D2080014 000020F2 v_sub_f32_e64 v21, 1.0, s17 ; D2080015 000022F2 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C s_load_dwordx8 s[28:35], s[6:7], 0x10 ; C0CE0710 s_load_dwordx8 s[36:43], s[6:7], 0x18 ; C0D20718 v_sub_f32_e64 v22, 1.0, s11 ; D2080016 000016F2 v_mul_f32_e32 v23, v0, v6 ; 102E0D00 v_mul_f32_e32 v23, v23, v18 ; 102E2517 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[12:15] ; F0800700 00671802 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v20, v24, v20 ; 10282918 v_mul_f32_e32 v21, v25, v21 ; 102A2B19 v_mul_f32_e32 v22, v26, v22 ; 102C2D1A v_mul_f32_e32 v24, v0, v5 ; 10300B00 v_mad_f32 v23, v19, v24, v23 ; D2820017 045E3113 v_mul_f32_e32 v24, v0, v4 ; 10300900 v_mad_f32 v23, v9, v24, v23 ; D2820017 045E3109 v_mul_f32_e32 v24, v18, v23 ; 10302F12 v_mad_f32 v24, v23, v18, v24 ; D2820018 04622517 v_mad_f32 v6, -v6, v0, v24 ; D2820006 24620106 v_mul_f32_e32 v24, v19, v23 ; 10302F13 v_mad_f32 v24, v23, v19, v24 ; D2820018 04622717 v_mad_f32 v5, -v5, v0, v24 ; D2820005 24620105 v_mul_f32_e32 v24, v9, v23 ; 10302F09 v_mad_f32 v23, v23, v9, v24 ; D2820017 04621317 v_mad_f32 v0, -v4, v0, v23 ; D2820000 245E0104 image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[24:27] ; F0800100 00C90202 v_mad_f32 v3, v17, v20, s1 ; D2820003 00062911 v_mad_f32 v4, v21, v17, s2 ; D2820004 000A2315 v_mad_f32 v17, v22, v17, s3 ; D2820011 000E2316 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v16, v2, s8 ; D2820014 00220510 v_mad_f32 v21, v2, v16, s9 ; D2820015 00262102 v_mad_f32 v2, v2, v16, s10 ; D2820002 002A2102 v_mul_f32_e32 v16, v7, v18 ; 10202507 v_mad_f32 v16, v19, v8, v16 ; D2820010 04421113 v_mul_f32_e32 v6, v7, v6 ; 100C0D07 v_mad_f32 v5, v5, v8, v6 ; D2820005 041A1105 v_mad_f32 v6, v9, v1, v16 ; D2820006 04420309 v_mad_f32 v0, v0, v1, v5 ; D2820000 04160300 v_add_f32_e64 v1, 0, v6 clamp ; D2060801 00020C80 v_mul_f32_e32 v3, v3, v1 ; 10060303 v_mul_f32_e32 v4, v4, v1 ; 10080304 v_mul_f32_e32 v5, v17, v1 ; 100A0311 v_mov_b32_e32 v6, 0xb58637bd ; 7E0C02FF B58637BD v_add_f32_e32 v1, v6, v1 ; 06020306 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, v3, 0, vcc ; D2000001 01A90103 v_cndmask_b32_e64 v3, v4, 0, vcc ; D2000003 01A90104 v_cndmask_b32_e64 v4, v5, 0, vcc ; D2000004 01A90105 v_max_f32_e32 v5, 0, v13 ; 200A1A80 v_mov_b32_e32 v7, 0x7fffffff ; 7E0E02FF 7FFFFFFF v_and_b32_e32 v8, v5, v7 ; 36100F05 v_log_f32_e32 v8, v8 ; 7E104F08 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_and_b32_e32 v7, v0, v7 ; 360E0F00 v_log_f32_e32 v7, v7 ; 7E0E4F07 v_mul_legacy_f32_e32 v8, s21, v8 ; 0E101015 v_add_f32_e32 v5, v6, v5 ; 060A0B06 v_add_f32_e32 v0, v6, v0 ; 06000106 v_mul_legacy_f32_e32 v6, s0, v7 ; 0E0C0E00 v_mov_b32_e32 v7, 0x41000000 ; 7E0E02FF 41000000 v_add_f32_e32 v7, s0, v7 ; 060E0E00 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_mul_f32_e32 v6, v6, v7 ; 100C0F06 v_exp_f32_e32 v7, v8 ; 7E0E4B08 v_mul_f32_e32 v6, 0x3d22f983, v6 ; 100C0CFF 3D22F983 v_mul_f32_e32 v8, v6, v20 ; 10102906 v_mul_f32_e32 v9, v6, v21 ; 10122B06 v_mul_f32_e32 v2, v6, v2 ; 10040506 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v8, 0, vcc ; D2000000 01A90108 v_cndmask_b32_e64 v6, v9, 0, vcc ; D2000006 01A90109 v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_cmp_gt_f32_e32 vcc, 0, v5 ; 7C080A80 v_cndmask_b32_e64 v5, v7, 0, vcc ; D2000005 01A90107 v_add_f32_e32 v0, v1, v0 ; 06000101 v_add_f32_e32 v1, v3, v6 ; 06020D03 v_add_f32_e32 v2, v4, v2 ; 06040504 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mul_f32_e32 v1, v1, v5 ; 10020B01 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mul_f32_e32 v0, v0, v14 ; 10001D00 v_mul_f32_e32 v1, v1, v15 ; 10021F01 v_mul_f32_e32 v2, v2, v10 ; 10041502 v_mul_f32_e32 v0, s20, v0 ; 10000014 v_mul_f32_e32 v1, s19, v1 ; 10020213 v_mul_f32_e32 v2, s18, v2 ; 10040412 v_mul_f32_e32 v0, v0, v11 ; 10001700 v_mul_f32_e32 v1, v1, v11 ; 10021701 v_mul_f32_e32 v2, v2, v11 ; 10041702 v_xor_b32_e32 v3, 0x80000000, v12 ; 3A0618FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v0, 0, v0, vcc ; D2000000 01AA0080 v_cndmask_b32_e64 v1, 0, v1, vcc ; D2000001 01AA0280 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cndmask_b32_e64 v1, 0, v2, vcc ; D2000001 01AA0480 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 28 Code Size: 1364 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL IN[2], TEXCOORD[4], PERSPECTIVE DCL IN[3], TEXCOORD[5], PERSPECTIVE DCL IN[4], TEXCOORD[6], PERSPECTIVE DCL IN[5], TEXCOORD[7], PERSPECTIVE DCL IN[6], FACE, CONSTANT DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..6] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, -0.0000} IMM[1] FLT32 { 0.0000, -0.3333, 8.0000, 0.0398} IMM[2] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[4], IN[4] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[4], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[2], IN[2] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[2], TEMP[0].xxxx 8: TEX TEMP[3], IN[1], SAMP[1], 2D 9: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[0].x, TEMP[3], TEMP[3] 11: RSQ TEMP[0].x, TEMP[0].xxxx 12: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 13: MUL TEMP[4].xyz, TEMP[3], TEMP[0].xxxx 14: CMP TEMP[1].w, IN[6].xxxx, IMM[0].zzzz, IMM[0].yyyy 15: MUL TEMP[1].w, TEMP[1].wwww, CONST[0].xxxx 16: MUL TEMP[3].xyz, TEMP[1].wwww, TEMP[4] 17: DP3 TEMP[1].w, TEMP[3], TEMP[1] 18: MUL TEMP[4].xyz, TEMP[1].wwww, TEMP[3] 19: MAD TEMP[1].xyz, TEMP[4], IMM[0].xxxx, -TEMP[1] 20: TEX TEMP[4], IN[0], SAMP[5], 2D 21: ADD TEMP[1].w, TEMP[4].xxxx, CONST[9].xxxx 22: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[9].yyyy 23: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].wwww 24: POW TEMP[3].w, |TEMP[1].wwww|, CONST[9].zzzz 25: CMP TEMP[4].xyz, TEMP[2].wwww, IMM[1].xxxx, TEMP[3].wwww 26: DP3 TEMP[1].w, IN[3], IN[3] 27: RSQ TEMP[0], |TEMP[1].wwww| 28: MIN TEMP[2].w, IMM[2].xxxx, TEMP[0] 29: MUL TEMP[5].xyz, TEMP[2].wwww, IN[3] 30: DP3 TEMP[2].w, TEMP[5], -CONST[10] 31: ADD TEMP[2].w, TEMP[2].wwww, -CONST[11].xxxx 32: MUL_SAT TEMP[2].w, TEMP[2].wwww, CONST[11].yyyy 33: MUL TEMP[2].w, TEMP[2].wwww, TEMP[2].wwww 34: ADD TEMP[1].w, -TEMP[1].wwww, IMM[0].zzzz 35: MUL TEMP[3].w, TEMP[4].zzzz, TEMP[1].wwww 36: MUL TEMP[5], TEMP[2].wwww, TEMP[3].wwww 37: MOV TEMP[6], TEMP[5] 38: KILL_IF TEMP[6] 39: TEX TEMP[6], IN[1], SAMP[4], 2D 40: ADD TEMP[3].w, TEMP[6].xxxx, IMM[1].yyyy 41: CMP TEMP[6], -TEMP[5].wwww, TEMP[3].wwww, IMM[1].xxxx 42: KILL_IF TEMP[6] 43: UIF CONST[240].xxxx :49 44: RCP TEMP[3].w, IN[5].wwww 45: MUL TEMP[5].xy, TEMP[3].wwww, IN[5] 46: MAD TEMP[5].xy, TEMP[5], CONST[1], CONST[1].wzzw 47: TEX TEMP[6], TEMP[5], SAMP[0], 2D 48: MUL TEMP[4].xyz, TEMP[4].zzzz, TEMP[6] 49: ENDIF 50: MOV TEMP[5].z, IMM[0].zzzz 51: ADD TEMP[5].xyz, TEMP[5].zzzz, -CONST[6] 52: TEX TEMP[6], IN[1], SAMP[2], 2D 53: MUL TEMP[5].xyz, TEMP[5], TEMP[6] 54: MAD TEMP[5].xyz, TEMP[5], CONST[4].wwww, CONST[4] 55: MAX TEMP[3].w, TEMP[1].wwww, IMM[1].xxxx 56: ADD TEMP[1].w, TEMP[3].wwww, IMM[0].wwww 57: POW TEMP[4].w, |TEMP[3].wwww|, CONST[8].wwww 58: CMP TEMP[1].w, TEMP[1].wwww, IMM[1].xxxx, TEMP[4].wwww 59: TEX TEMP[6], IN[1], SAMP[3], 2D 60: MAD TEMP[6].xyz, TEMP[6], CONST[5].wwww, CONST[5] 61: DP3_SAT TEMP[3].x, TEMP[3], TEMP[2] 62: ADD TEMP[3].y, TEMP[3].xxxx, IMM[0].wwww 63: DP3_SAT TEMP[1].x, TEMP[1], TEMP[2] 64: ADD TEMP[1].y, TEMP[1].xxxx, IMM[0].wwww 65: POW TEMP[2].x, |TEMP[1].xxxx|, CONST[7].xxxx 66: MOV TEMP[1].z, IMM[1].zzzz 67: ADD TEMP[1].x, TEMP[1].zzzz, CONST[7].xxxx 68: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 69: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].wwww 70: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[5] 71: CMP TEMP[2].xyz, TEMP[3].yyyy, IMM[1].xxxx, TEMP[2] 72: MUL TEMP[3].xyz, TEMP[6], TEMP[1].xxxx 73: CMP TEMP[1].xyz, TEMP[1].yyyy, IMM[1].xxxx, TEMP[3] 74: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 75: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1] 76: MUL TEMP[1].xyz, TEMP[4], TEMP[1] 77: MUL TEMP[1].xyz, TEMP[1], CONST[8] 78: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[1] 79: CMP OUT[0].xyz, -TEMP[5].wwww, TEMP[1], IMM[1].xxxx 80: MOV OUT[0].w, IMM[1].xxxx 81: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %50 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %51 = load <32 x i8>, <32 x i8> addrspace(2)* %50, align 32, !tbaa !0 %52 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %55 = bitcast <8 x i32> addrspace(2)* %54 to <32 x i8> addrspace(2)* %56 = load <32 x i8>, <32 x i8> addrspace(2)* %55, align 32, !tbaa !0 %57 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %58 = bitcast <4 x i32> addrspace(2)* %57 to <16 x i8> addrspace(2)* %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 %60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %61 = bitcast <8 x i32> addrspace(2)* %60 to <32 x i8> addrspace(2)* %62 = load <32 x i8>, <32 x i8> addrspace(2)* %61, align 32, !tbaa !0 %63 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %64 = bitcast <4 x i32> addrspace(2)* %63 to <16 x i8> addrspace(2)* %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0 %66 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %67 = bitcast <8 x i32> addrspace(2)* %66 to <32 x i8> addrspace(2)* %68 = load <32 x i8>, <32 x i8> addrspace(2)* %67, align 32, !tbaa !0 %69 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %70 = bitcast <4 x i32> addrspace(2)* %69 to <16 x i8> addrspace(2)* %71 = load <16 x i8>, <16 x i8> addrspace(2)* %70, align 16, !tbaa !0 %72 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %73 = bitcast <8 x i32> addrspace(2)* %72 to <32 x i8> addrspace(2)* %74 = load <32 x i8>, <32 x i8> addrspace(2)* %73, align 32, !tbaa !0 %75 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %76 = bitcast <4 x i32> addrspace(2)* %75 to <16 x i8> addrspace(2)* %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !tbaa !0 %78 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %79 = bitcast <8 x i32> addrspace(2)* %78 to <32 x i8> addrspace(2)* %80 = load <32 x i8>, <32 x i8> addrspace(2)* %79, align 32, !tbaa !0 %81 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %82 = bitcast <4 x i32> addrspace(2)* %81 to <16 x i8> addrspace(2)* %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 %84 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %91 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %97 = fmul float %94, %94 %98 = fmul float %95, %95 %99 = fadd float %98, %97 %100 = fmul float %96, %96 %101 = fadd float %99, %100 %102 = call float @llvm.AMDGPU.rsq.clamped.f32(float %101) %103 = call float @llvm.minnum.f32(float %102, float 0x47EFFFFFE0000000) %104 = fmul float %94, %103 %105 = fmul float %95, %103 %106 = fmul float %96, %103 %107 = fmul float %88, %88 %108 = fmul float %89, %89 %109 = fadd float %108, %107 %110 = fmul float %90, %90 %111 = fadd float %109, %110 %112 = call float @llvm.AMDGPU.rsq.clamped.f32(float %111) %113 = call float @llvm.minnum.f32(float %112, float 0x47EFFFFFE0000000) %114 = fmul float %88, %113 %115 = fmul float %89, %113 %116 = fmul float %90, %113 %117 = bitcast float %86 to i32 %118 = bitcast float %87 to i32 %119 = insertelement <2 x i32> undef, i32 %117, i32 0 %120 = insertelement <2 x i32> %119, i32 %118, i32 1 %121 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %120, <32 x i8> %56, <16 x i8> %59, i32 2) %122 = extractelement <4 x float> %121, i32 0 %123 = extractelement <4 x float> %121, i32 1 %124 = extractelement <4 x float> %121, i32 2 %125 = fmul float %122, 2.000000e+00 %126 = fadd float %125, -1.000000e+00 %127 = fmul float %123, 2.000000e+00 %128 = fadd float %127, -1.000000e+00 %129 = fmul float %124, 2.000000e+00 %130 = fadd float %129, -1.000000e+00 %131 = fmul float %126, %126 %132 = fmul float %128, %128 %133 = fadd float %132, %131 %134 = fmul float %130, %130 %135 = fadd float %133, %134 %136 = call float @llvm.AMDGPU.rsq.clamped.f32(float %135) %137 = call float @llvm.minnum.f32(float %136, float 0x47EFFFFFE0000000) %138 = fmul float %126, %137 %139 = fmul float %128, %137 %140 = fmul float %130, %137 %141 = call float @llvm.AMDGPU.cndlt(float %18, float 1.000000e+00, float -1.000000e+00) %142 = fmul float %141, %24 %143 = fmul float %142, %138 %144 = fmul float %142, %139 %145 = fmul float %142, %140 %146 = fmul float %143, %104 %147 = fmul float %144, %105 %148 = fadd float %147, %146 %149 = fmul float %145, %106 %150 = fadd float %148, %149 %151 = fmul float %150, %143 %152 = fmul float %150, %144 %153 = fmul float %150, %145 %154 = fmul float %151, 2.000000e+00 %155 = fsub float %154, %104 %156 = fmul float %152, 2.000000e+00 %157 = fsub float %156, %105 %158 = fmul float %153, 2.000000e+00 %159 = fsub float %158, %106 %160 = bitcast float %84 to i32 %161 = bitcast float %85 to i32 %162 = insertelement <2 x i32> undef, i32 %160, i32 0 %163 = insertelement <2 x i32> %162, i32 %161, i32 1 %164 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %163, <32 x i8> %80, <16 x i8> %83, i32 2) %165 = extractelement <4 x float> %164, i32 0 %166 = fadd float %165, %41 %167 = fmul float %166, %42 %168 = call float @llvm.AMDIL.clamp.(float %167, float 0.000000e+00, float 1.000000e+00) %169 = fadd float %168, 0xBEB0C6F7A0000000 %170 = call float @fabs(float %168) %171 = call float @llvm.pow.f32(float %170, float %43) %172 = call float @llvm.AMDGPU.cndlt(float %169, float 0.000000e+00, float %171) %173 = call float @llvm.AMDGPU.cndlt(float %169, float 0.000000e+00, float %171) %174 = call float @llvm.AMDGPU.cndlt(float %169, float 0.000000e+00, float %171) %175 = fmul float %91, %91 %176 = fmul float %92, %92 %177 = fadd float %176, %175 %178 = fmul float %93, %93 %179 = fadd float %177, %178 %180 = call float @fabs(float %179) %181 = call float @llvm.AMDGPU.rsq.clamped.f32(float %180) %182 = call float @llvm.minnum.f32(float %181, float 0x47EFFFFFE0000000) %183 = fmul float %182, %91 %184 = fmul float %182, %92 %185 = fmul float %182, %93 %186 = fmul float %44, %183 %187 = fsub float -0.000000e+00, %186 %188 = fmul float %45, %184 %189 = fsub float %187, %188 %190 = fmul float %46, %185 %191 = fsub float %189, %190 %192 = fsub float %191, %47 %193 = fmul float %192, %48 %194 = call float @llvm.AMDIL.clamp.(float %193, float 0.000000e+00, float 1.000000e+00) %195 = fmul float %194, %194 %196 = fsub float 1.000000e+00, %179 %197 = fmul float %174, %196 %198 = fmul float %195, %197 %199 = fmul float %195, %197 %200 = fmul float %195, %197 %201 = fmul float %195, %197 %202 = fcmp olt float %198, 0.000000e+00 %203 = fcmp olt float %199, 0.000000e+00 %204 = fcmp olt float %200, 0.000000e+00 %205 = fcmp olt float %201, 0.000000e+00 %206 = or i1 %205, %204 %207 = or i1 %206, %203 %208 = or i1 %207, %202 %209 = select i1 %208, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %209) %210 = bitcast float %86 to i32 %211 = bitcast float %87 to i32 %212 = insertelement <2 x i32> undef, i32 %210, i32 0 %213 = insertelement <2 x i32> %212, i32 %211, i32 1 %214 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %213, <32 x i8> %74, <16 x i8> %77, i32 2) %215 = extractelement <4 x float> %214, i32 0 %216 = fadd float %215, 0xBFD554C980000000 %217 = fsub float -0.000000e+00, %201 %218 = call float @llvm.AMDGPU.cndlt(float %217, float %216, float 0.000000e+00) %219 = fsub float -0.000000e+00, %201 %220 = call float @llvm.AMDGPU.cndlt(float %219, float %216, float 0.000000e+00) %221 = fsub float -0.000000e+00, %201 %222 = call float @llvm.AMDGPU.cndlt(float %221, float %216, float 0.000000e+00) %223 = fsub float -0.000000e+00, %201 %224 = call float @llvm.AMDGPU.cndlt(float %223, float %216, float 0.000000e+00) %225 = fcmp olt float %218, 0.000000e+00 %226 = fcmp olt float %220, 0.000000e+00 %227 = fcmp olt float %222, 0.000000e+00 %228 = fcmp olt float %224, 0.000000e+00 %229 = or i1 %228, %227 %230 = or i1 %229, %226 %231 = or i1 %230, %225 %232 = select i1 %231, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %232) %233 = bitcast float %49 to i32 %234 = icmp eq i32 %233, 0 br i1 %234, label %ENDIF, label %IF IF: ; preds = %main_body %235 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %236 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %237 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %238 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %239 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %240 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %241 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %242 = fdiv float 1.000000e+00, %235 %243 = fmul float %242, %237 %244 = fmul float %242, %236 %245 = fmul float %243, %241 %246 = fadd float %245, %238 %247 = fmul float %244, %240 %248 = fadd float %247, %239 %249 = bitcast float %246 to i32 %250 = bitcast float %248 to i32 %251 = insertelement <2 x i32> undef, i32 %249, i32 0 %252 = insertelement <2 x i32> %251, i32 %250, i32 1 %253 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %252, <32 x i8> %51, <16 x i8> %53, i32 2) %254 = extractelement <4 x float> %253, i32 0 %255 = extractelement <4 x float> %253, i32 1 %256 = extractelement <4 x float> %253, i32 2 %257 = fmul float %174, %254 %258 = fmul float %174, %255 %259 = fmul float %174, %256 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp16.0 = phi float [ %257, %IF ], [ %172, %main_body ] %temp17.0 = phi float [ %258, %IF ], [ %173, %main_body ] %temp18.0 = phi float [ %259, %IF ], [ %174, %main_body ] %260 = fsub float 1.000000e+00, %33 %261 = fsub float 1.000000e+00, %34 %262 = fsub float 1.000000e+00, %35 %263 = bitcast float %86 to i32 %264 = bitcast float %87 to i32 %265 = insertelement <2 x i32> undef, i32 %263, i32 0 %266 = insertelement <2 x i32> %265, i32 %264, i32 1 %267 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %266, <32 x i8> %62, <16 x i8> %65, i32 2) %268 = extractelement <4 x float> %267, i32 0 %269 = extractelement <4 x float> %267, i32 1 %270 = extractelement <4 x float> %267, i32 2 %271 = fmul float %260, %268 %272 = fmul float %261, %269 %273 = fmul float %262, %270 %274 = fmul float %271, %28 %275 = fadd float %274, %25 %276 = fmul float %272, %28 %277 = fadd float %276, %26 %278 = fmul float %273, %28 %279 = fadd float %278, %27 %280 = call float @llvm.maxnum.f32(float %196, float 0.000000e+00) %281 = fadd float %280, 0xBEB0C6F7A0000000 %282 = call float @fabs(float %280) %283 = call float @llvm.pow.f32(float %282, float %40) %284 = call float @llvm.AMDGPU.cndlt(float %281, float 0.000000e+00, float %283) %285 = bitcast float %86 to i32 %286 = bitcast float %87 to i32 %287 = insertelement <2 x i32> undef, i32 %285, i32 0 %288 = insertelement <2 x i32> %287, i32 %286, i32 1 %289 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %288, <32 x i8> %68, <16 x i8> %71, i32 2) %290 = extractelement <4 x float> %289, i32 0 %291 = extractelement <4 x float> %289, i32 1 %292 = extractelement <4 x float> %289, i32 2 %293 = fmul float %290, %32 %294 = fadd float %293, %29 %295 = fmul float %291, %32 %296 = fadd float %295, %30 %297 = fmul float %292, %32 %298 = fadd float %297, %31 %299 = fmul float %143, %114 %300 = fmul float %144, %115 %301 = fadd float %300, %299 %302 = fmul float %145, %116 %303 = fadd float %301, %302 %304 = call float @llvm.AMDIL.clamp.(float %303, float 0.000000e+00, float 1.000000e+00) %305 = fadd float %304, 0xBEB0C6F7A0000000 %306 = fmul float %155, %114 %307 = fmul float %157, %115 %308 = fadd float %307, %306 %309 = fmul float %159, %116 %310 = fadd float %308, %309 %311 = call float @llvm.AMDIL.clamp.(float %310, float 0.000000e+00, float 1.000000e+00) %312 = fadd float %311, 0xBEB0C6F7A0000000 %313 = call float @fabs(float %311) %314 = call float @llvm.pow.f32(float %313, float %36) %315 = fadd float %36, 8.000000e+00 %316 = fmul float %315, %314 %317 = fmul float %316, 0x3FA45F3060000000 %318 = fmul float %304, %275 %319 = fmul float %304, %277 %320 = fmul float %304, %279 %321 = call float @llvm.AMDGPU.cndlt(float %305, float 0.000000e+00, float %318) %322 = call float @llvm.AMDGPU.cndlt(float %305, float 0.000000e+00, float %319) %323 = call float @llvm.AMDGPU.cndlt(float %305, float 0.000000e+00, float %320) %324 = fmul float %294, %317 %325 = fmul float %296, %317 %326 = fmul float %298, %317 %327 = call float @llvm.AMDGPU.cndlt(float %312, float 0.000000e+00, float %324) %328 = call float @llvm.AMDGPU.cndlt(float %312, float 0.000000e+00, float %325) %329 = call float @llvm.AMDGPU.cndlt(float %312, float 0.000000e+00, float %326) %330 = fadd float %327, %321 %331 = fadd float %328, %322 %332 = fadd float %329, %323 %333 = fmul float %284, %330 %334 = fmul float %284, %331 %335 = fmul float %284, %332 %336 = fmul float %temp16.0, %333 %337 = fmul float %temp17.0, %334 %338 = fmul float %temp18.0, %335 %339 = fmul float %336, %37 %340 = fmul float %337, %38 %341 = fmul float %338, %39 %342 = fmul float %195, %339 %343 = fmul float %195, %340 %344 = fmul float %195, %341 %345 = fsub float -0.000000e+00, %201 %346 = call float @llvm.AMDGPU.cndlt(float %345, float %342, float 0.000000e+00) %347 = fsub float -0.000000e+00, %201 %348 = call float @llvm.AMDGPU.cndlt(float %347, float %343, float 0.000000e+00) %349 = fsub float -0.000000e+00, %201 %350 = call float @llvm.AMDGPU.cndlt(float %349, float %344, float 0.000000e+00) %351 = call i32 @llvm.SI.packf16(float %346, float %348) %352 = bitcast i32 %351 to float %353 = call i32 @llvm.SI.packf16(float %350, float 0.000000e+00) %354 = bitcast i32 %353 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %352, float %354, float %352, float %354) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_movk_i32 s0, 0xf00 ; B0000F00 s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_mov_b32 m0, s9 ; BEFC0309 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v7, -1.0, 1.0, vcc ; D2000007 01A9E4F3 v_mov_b32_e32 v11, 0xb58637bd ; 7E1602FF B58637BD v_interp_p1_f32 v12, v0, 0, 0, [m0] ; C8300000 v_interp_p2_f32 v12, [v12], v1, 0, 0, [m0] ; C8310001 v_interp_p1_f32 v13, v0, 1, 0, [m0] ; C8340100 v_interp_p2_f32 v13, [v13], v1, 1, 0, [m0] ; C8350101 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s32, s[12:15], s0 ; C2100C00 s_buffer_load_dword s8, s[12:15], 0x0 ; C2040D00 s_buffer_load_dword s2, s[12:15], 0x10 ; C2010D10 s_buffer_load_dword s3, s[12:15], 0x11 ; C2018D11 s_buffer_load_dword s1, s[12:15], 0x12 ; C2008D12 s_buffer_load_dword s33, s[12:15], 0x13 ; C2108D13 s_buffer_load_dword s0, s[12:15], 0x22 ; C2000D22 s_buffer_load_dword s9, s[12:15], 0x28 ; C2048D28 s_buffer_load_dword s10, s[12:15], 0x29 ; C2050D29 s_buffer_load_dword s11, s[12:15], 0x2a ; C2058D2A s_buffer_load_dword s34, s[12:15], 0x2c ; C2110D2C s_buffer_load_dword s35, s[12:15], 0x2d ; C2118D2D s_buffer_load_dword s36, s[12:15], 0x24 ; C2120D24 s_buffer_load_dword s37, s[12:15], 0x25 ; C2128D25 s_buffer_load_dword s38, s[12:15], 0x26 ; C2130D26 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v3, v0, 0, 2, [m0] ; C80C0800 v_interp_p2_f32 v3, [v3], v1, 0, 2, [m0] ; C80D0801 v_interp_p1_f32 v4, v0, 1, 2, [m0] ; C8100900 v_interp_p2_f32 v4, [v4], v1, 1, 2, [m0] ; C8110901 v_interp_p1_f32 v2, v0, 2, 2, [m0] ; C8080A00 v_interp_p2_f32 v2, [v2], v1, 2, 2, [m0] ; C8090A01 v_interp_p1_f32 v14, v0, 0, 3, [m0] ; C8380C00 v_interp_p2_f32 v14, [v14], v1, 0, 3, [m0] ; C8390C01 v_interp_p1_f32 v15, v0, 1, 3, [m0] ; C83C0D00 v_interp_p2_f32 v15, [v15], v1, 1, 3, [m0] ; C83D0D01 v_interp_p1_f32 v16, v0, 2, 3, [m0] ; C8400E00 v_interp_p2_f32 v16, [v16], v1, 2, 3, [m0] ; C8410E01 v_interp_p1_f32 v8, v0, 0, 4, [m0] ; C8201000 s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 v_interp_p2_f32 v8, [v8], v1, 0, 4, [m0] ; C8211001 v_interp_p1_f32 v9, v0, 1, 4, [m0] ; C8241100 v_interp_p2_f32 v9, [v9], v1, 1, 4, [m0] ; C8251101 s_load_dwordx8 s[48:55], s[6:7], 0x28 ; C0D80728 s_load_dwordx4 s[56:59], s[4:5], 0x14 ; C09C0514 v_interp_p1_f32 v10, v0, 2, 4, [m0] ; C8281200 v_interp_p2_f32 v10, [v10], v1, 2, 4, [m0] ; C8291201 s_load_dwordx4 s[20:23], s[4:5], 0x10 ; C08A0510 s_load_dwordx8 s[24:31], s[6:7], 0x20 ; C0CC0720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[40:47], s[16:19] ; F0800700 008A1105 v_mul_f32_e32 v20, v14, v14 ; 10281D0E v_mad_f32 v20, v15, v15, v20 ; D2820014 04521F0F v_mad_f32 v23, v16, v16, v20 ; D2820017 04522110 v_rsq_clamp_f32_e64 v20, |v23| ; D3580114 00000117 image_sample v12, 1, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[48:55], s[56:59] ; F0800100 01CC0C0C s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v12, s36, v12 ; 06181824 v_mul_f32_e32 v12, s37, v12 ; 10181825 v_min_f32_e32 v13, 0x7f7fffff, v20 ; 1E1A28FF 7F7FFFFF v_mul_f32_e32 v14, v14, v13 ; 101C1B0E v_mul_f32_e32 v14, s9, v14 ; 101C1C09 v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 v_and_b32_e32 v20, 0x7fffffff, v12 ; 362818FF 7FFFFFFF v_log_f32_e32 v20, v20 ; 7E284F14 v_mul_f32_e32 v15, v15, v13 ; 101E1B0F v_mad_f32 v14, -s10, v15, -v14 ; D282000E A43A1E0A v_mul_f32_e32 v13, v16, v13 ; 101A1B10 v_mad_f32 v14, -s11, v13, v14 ; D282000E 243A1A0B v_mul_legacy_f32_e32 v13, s38, v20 ; 0E1A2826 v_add_f32_e32 v11, v12, v11 ; 0616170C v_exp_f32_e32 v12, v13 ; 7E184B0D v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_cndmask_b32_e64 v13, v12, 0, vcc ; D200000D 01A9010C v_mad_f32 v20, 2.0, v17, -1.0 ; D2820014 03CE22F4 v_mad_f32 v21, 2.0, v18, -1.0 ; D2820015 03CE24F4 v_mad_f32 v22, 2.0, v19, -1.0 ; D2820016 03CE26F4 v_subrev_f32_e32 v11, s34, v14 ; 0A161C22 v_mul_f32_e32 v11, s35, v11 ; 10161623 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_mul_f32_e32 v11, v11, v11 ; 1016170B v_sub_f32_e32 v14, 1.0, v23 ; 081C2EF2 v_mul_f32_e32 v12, v14, v13 ; 10181B0E v_mul_f32_e32 v12, v12, v11 ; 1018170C v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v15, 0, -1.0, vcc ; D200000F 01A9E680 v_cndmask_b32_e64 v15, v15, -1.0, vcc ; D200000F 01A9E70F v_cndmask_b32_e64 v15, v15, -1.0, vcc ; D200000F 01A9E70F v_cndmask_b32_e64 v16, v15, -1.0, vcc ; D2000010 01A9E70F v_mul_f32_e32 v15, v8, v8 ; 101E1108 v_mad_f32 v15, v9, v9, v15 ; D282000F 043E1309 v_mad_f32 v15, v10, v10, v15 ; D282000F 043E150A v_rsq_clamp_f32_e32 v23, v15 ; 7E2E590F v_mul_f32_e32 v15, v3, v3 ; 101E0703 v_mad_f32 v15, v4, v4, v15 ; D282000F 043E0904 v_mad_f32 v15, v2, v2, v15 ; D282000F 043E0502 v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F v_mul_f32_e32 v17, v20, v20 ; 10222914 v_mad_f32 v17, v21, v21, v17 ; D2820011 04462B15 v_mad_f32 v17, v22, v22, v17 ; D2820011 04462D16 v_rsq_clamp_f32_e32 v24, v17 ; 7E305911 v_cmpx_le_f32_e32 vcc, 0, v16 ; 7C262080 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 image_sample v16, 1, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[24:31], s[20:23] ; F0800100 00A61005 v_mov_b32_e32 v17, 0xbeaaa64c ; 7E2202FF BEAAA64C s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v16, v16, v17 ; 06202310 s_buffer_load_dword s16, s[12:15], 0x23 ; C2080D23 s_buffer_load_dword s19, s[12:15], 0x14 ; C2098D14 s_buffer_load_dword s18, s[12:15], 0x15 ; C2090D15 s_buffer_load_dword s17, s[12:15], 0x16 ; C2088D16 s_buffer_load_dword s23, s[12:15], 0x17 ; C20B8D17 s_buffer_load_dword s22, s[12:15], 0x18 ; C20B0D18 s_buffer_load_dword s20, s[12:15], 0x19 ; C20A0D19 s_buffer_load_dword s21, s[12:15], 0x1a ; C20A8D1A s_buffer_load_dword s11, s[12:15], 0x1c ; C2058D1C s_buffer_load_dword s10, s[12:15], 0x20 ; C2050D20 s_buffer_load_dword s9, s[12:15], 0x21 ; C2048D21 v_xor_b32_e32 v17, 0x80000000, v12 ; 3A2218FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v17 ; 7C082280 v_cndmask_b32_e64 v16, 0, v16, vcc ; D2000010 01AA2080 v_cmp_gt_f32_e32 vcc, 0, v16 ; 7C082080 v_cndmask_b32_e64 v16, 0, -1.0, vcc ; D2000010 01A9E680 v_cndmask_b32_e64 v16, v16, -1.0, vcc ; D2000010 01A9E710 v_cndmask_b32_e64 v16, v16, -1.0, vcc ; D2000010 01A9E710 v_cndmask_b32_e64 v16, v16, -1.0, vcc ; D2000010 01A9E710 v_cmpx_le_f32_e32 vcc, 0, v16 ; 7C262080 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_mov_b32_e32 v16, s33 ; 7E200221 v_cmp_ne_i32_e64 s[24:25], 0, s32 ; D10A0018 00004080 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v19, s23 ; 7E260217 v_mov_b32_e32 v17, v13 ; 7E22030D v_mov_b32_e32 v18, v13 ; 7E24030D s_and_saveexec_b64 s[24:25], s[24:25] ; BE982418 s_xor_b64 s[24:25], exec, s[24:25] ; 8998187E s_cbranch_execz BB0_2 ; BF880000 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_load_dwordx4 s[36:39], s[4:5], 0x0 ; C0920500 v_interp_p1_f32 v17, v0, 3, 5, [m0] ; C8441700 s_buffer_load_dword s23, s[12:15], 0x6 ; C20B8D06 s_buffer_load_dword s26, s[12:15], 0x7 ; C20D0D07 s_buffer_load_dword s27, s[12:15], 0x4 ; C20D8D04 s_buffer_load_dword s40, s[12:15], 0x5 ; C2140D05 v_interp_p2_f32 v17, [v17], v1, 3, 5, [m0] ; C8451701 v_interp_p1_f32 v18, v0, 1, 5, [m0] ; C8481500 v_rcp_f32_e32 v17, v17 ; 7E225511 v_interp_p2_f32 v18, [v18], v1, 1, 5, [m0] ; C8491501 v_interp_p1_f32 v0, v0, 0, 5, [m0] ; C8001400 v_interp_p2_f32 v0, [v0], v1, 0, 5, [m0] ; C8011401 v_mul_f32_e32 v0, v0, v17 ; 10002300 v_mul_f32_e32 v1, v18, v17 ; 10022312 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v17, s26 ; 7E22021A v_mad_f32 v17, s27, v0, v17 ; D2820011 0446001B v_mov_b32_e32 v0, s23 ; 7E000217 v_mad_f32 v18, s40, v1, v0 ; D2820012 04020228 image_sample v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[28:35], s[36:39] ; F0800700 01271911 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v17, v25, v13 ; 10221B19 v_mul_f32_e32 v18, v26, v13 ; 10241B1A v_mul_f32_e32 v13, v27, v13 ; 101A1B1B s_or_b64 exec, exec, s[24:25] ; 88FE187E v_mul_f32_e32 v0, s8, v7 ; 10000E08 v_min_f32_e32 v1, 0x7f7fffff, v23 ; 1E022EFF 7F7FFFFF v_min_f32_e32 v7, 0x7f7fffff, v24 ; 1E0E30FF 7F7FFFFF v_mul_f32_e32 v20, v7, v20 ; 10282907 v_mul_f32_e32 v21, v7, v21 ; 102A2B07 v_mul_f32_e32 v7, v7, v22 ; 100E2D07 v_mul_f32_e32 v20, v20, v0 ; 10280114 v_mul_f32_e32 v21, v21, v0 ; 102A0115 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_sub_f32_e64 v7, 1.0, s22 ; D2080007 00002CF2 s_load_dwordx4 s[32:35], s[4:5], 0x8 ; C0900508 s_load_dwordx4 s[12:15], s[4:5], 0xc ; C086050C s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 s_load_dwordx8 s[24:31], s[6:7], 0x18 ; C0CC0718 v_sub_f32_e64 v22, 1.0, s20 ; D2080016 000028F2 v_sub_f32_e64 v23, 1.0, s21 ; D2080017 00002AF2 v_mul_f32_e32 v24, v1, v8 ; 10301101 v_mul_f32_e32 v24, v24, v20 ; 10302918 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[36:43], s[32:35] ; F0800700 01091905 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v7, v25, v7 ; 100E0F19 v_mul_f32_e32 v22, v26, v22 ; 102C2D1A v_mul_f32_e32 v23, v27, v23 ; 102E2F1B image_sample v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[24:31], s[12:15] ; F0800700 00661905 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, v19, v25, s19 ; D2820005 004E3313 v_mad_f32 v6, v26, v19, s18 ; D2820006 004A271A v_mad_f32 v19, v27, v19, s17 ; D2820013 0046271B v_mul_f32_e32 v25, v1, v9 ; 10321301 v_mad_f32 v24, v21, v25, v24 ; D2820018 04623315 v_mul_f32_e32 v25, v1, v10 ; 10321501 v_mad_f32 v24, v0, v25, v24 ; D2820018 04623300 v_mul_f32_e32 v25, v20, v24 ; 10323114 v_mad_f32 v25, v24, v20, v25 ; D2820019 04662918 v_mad_f32 v8, -v8, v1, v25 ; D2820008 24660308 v_mul_f32_e32 v25, v21, v24 ; 10323115 v_mad_f32 v25, v24, v21, v25 ; D2820019 04662B18 v_mad_f32 v9, -v9, v1, v25 ; D2820009 24660309 v_mul_f32_e32 v25, v0, v24 ; 10323100 v_mad_f32 v24, v24, v0, v25 ; D2820018 04660118 v_mad_f32 v1, -v10, v1, v24 ; D2820001 2462030A v_mad_f32 v7, v16, v7, s2 ; D2820007 000A0F10 v_mad_f32 v10, v22, v16, s3 ; D282000A 000E2116 v_mad_f32 v16, v23, v16, s1 ; D2820010 00062117 v_max_f32_e32 v14, 0, v14 ; 201C1C80 v_mov_b32_e32 v22, 0xb58637bd ; 7E2C02FF B58637BD v_add_f32_e32 v23, v22, v14 ; 062E1D16 v_cmp_gt_f32_e32 vcc, 0, v23 ; 7C082E80 v_min_f32_e32 v15, 0x7f7fffff, v15 ; 1E1E1EFF 7F7FFFFF v_mul_f32_e32 v3, v15, v3 ; 1006070F v_mul_f32_e32 v4, v15, v4 ; 1008090F v_mov_b32_e32 v23, 0x7fffffff ; 7E2E02FF 7FFFFFFF v_and_b32_e32 v14, v14, v23 ; 361C2F0E v_log_f32_e32 v14, v14 ; 7E1C4F0E v_mul_f32_e32 v20, v3, v20 ; 10282903 v_mad_f32 v20, v21, v4, v20 ; D2820014 04520915 v_mul_f32_e32 v3, v3, v8 ; 10061103 v_mul_legacy_f32_e32 v8, s16, v14 ; 0E101C10 v_exp_f32_e32 v8, v8 ; 7E104B08 v_cndmask_b32_e64 v8, v8, 0, vcc ; D2000008 01A90108 v_mad_f32 v3, v9, v4, v3 ; D2820003 040E0909 v_mul_f32_e32 v2, v15, v2 ; 1004050F v_mad_f32 v0, v0, v2, v20 ; D2820000 04520500 v_mad_f32 v1, v1, v2, v3 ; D2820001 040E0501 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v2, v7, v0 ; 10040107 v_mul_f32_e32 v3, v10, v0 ; 1006010A v_mul_f32_e32 v4, v16, v0 ; 10080110 v_add_f32_e32 v0, v22, v0 ; 06000116 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_and_b32_e32 v7, v1, v23 ; 360E2F01 v_log_f32_e32 v7, v7 ; 7E0E4F07 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v2, 0, vcc ; D2000000 01A90102 v_cndmask_b32_e64 v2, v3, 0, vcc ; D2000002 01A90103 v_cndmask_b32_e64 v3, v4, 0, vcc ; D2000003 01A90104 v_add_f32_e32 v1, v22, v1 ; 06020316 v_mul_legacy_f32_e32 v4, s11, v7 ; 0E080E0B v_mov_b32_e32 v7, 0x41000000 ; 7E0E02FF 41000000 v_add_f32_e32 v7, s11, v7 ; 060E0E0B v_exp_f32_e32 v4, v4 ; 7E084B04 v_mul_f32_e32 v4, v4, v7 ; 10080F04 v_mul_f32_e32 v4, 0x3d22f983, v4 ; 100808FF 3D22F983 v_mul_f32_e32 v5, v4, v5 ; 100A0B04 v_mul_f32_e32 v6, v4, v6 ; 100C0D04 v_mul_f32_e32 v4, v4, v19 ; 10082704 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, v5, 0, vcc ; D2000001 01A90105 v_cndmask_b32_e64 v5, v6, 0, vcc ; D2000005 01A90106 v_cndmask_b32_e64 v4, v4, 0, vcc ; D2000004 01A90104 v_add_f32_e32 v0, v0, v1 ; 06000300 v_add_f32_e32 v1, v2, v5 ; 06020B02 v_add_f32_e32 v2, v3, v4 ; 06040903 v_mul_f32_e32 v0, v0, v8 ; 10001100 v_mul_f32_e32 v1, v1, v8 ; 10021101 v_mul_f32_e32 v2, v2, v8 ; 10041102 v_mul_f32_e32 v0, v0, v17 ; 10002300 v_mul_f32_e32 v1, v1, v18 ; 10022501 v_mul_f32_e32 v2, v2, v13 ; 10041B02 v_mul_f32_e32 v0, s10, v0 ; 1000000A v_mul_f32_e32 v1, s9, v1 ; 10020209 v_mul_f32_e32 v2, s0, v2 ; 10040400 v_mul_f32_e32 v0, v0, v11 ; 10001700 v_mul_f32_e32 v1, v1, v11 ; 10021701 v_mul_f32_e32 v2, v2, v11 ; 10041702 v_xor_b32_e32 v3, 0x80000000, v12 ; 3A0618FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v0, 0, v0, vcc ; D2000000 01AA0080 v_cndmask_b32_e64 v1, 0, v1, vcc ; D2000001 01AA0280 v_cndmask_b32_e64 v2, 0, v2, vcc ; D2000002 01AA0480 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e64 v1, v2, 0 ; D25E0001 00010102 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 28 Code Size: 1496 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], TEXCOORD[0] DCL OUT[3], TEXCOORD[5] DCL OUT[4], TEXCOORD[6] DCL OUT[5].xyz, TEXCOORD[7] DCL CONST[0..12] DCL TEMP[0..5] IMM[0] FLT32 { 0.0078, -1.0000, 0.0000, 1.0000} 0: MAD TEMP[0].xyz, IN[1].yzxw, IMM[0].xxxx, IMM[0].yyyy 1: MAD TEMP[1], IN[2], IMM[0].xxxx, IMM[0].yyyy 2: MUL TEMP[2].xyz, TEMP[0], TEMP[1].zxyw 3: MAD TEMP[0].xyz, TEMP[1].yzxw, TEMP[0].yzxw, -TEMP[2] 4: MUL TEMP[0].xyz, TEMP[1].wwww, TEMP[0] 5: MUL TEMP[2].xyz, TEMP[1].yzxw, TEMP[0].zxyw 6: MAD TEMP[2].xyz, TEMP[0].yzxw, TEMP[1].zxyw, -TEMP[2] 7: MUL TEMP[2].xyz, TEMP[1].wwww, TEMP[2] 8: MUL TEMP[3], CONST[7], IN[0].yyyy 9: MAD TEMP[3], CONST[6], IN[0].xxxx, TEMP[3] 10: MAD TEMP[3], CONST[8], IN[0].zzzz, TEMP[3] 11: MAD TEMP[3], CONST[9], IN[0].wwww, TEMP[3] 12: MAD TEMP[4].xyz, TEMP[3], -CONST[4].wwww, CONST[4] 13: MUL TEMP[5].xyz, TEMP[4].yyyy, CONST[11] 14: MAD TEMP[4].xyw, CONST[10].xyzz, TEMP[4].xxxx, TEMP[5].xyzz 15: MAD TEMP[4].xyz, CONST[12], TEMP[4].zzzz, TEMP[4].xyww 16: DP3 OUT[4].x, TEMP[2], TEMP[4] 17: DP3 OUT[5].x, TEMP[2], CONST[12] 18: DP3 OUT[4].y, TEMP[0], TEMP[4] 19: DP3 OUT[5].y, TEMP[0], CONST[12] 20: DP3 OUT[4].z, TEMP[1], TEMP[4] 21: DP3 OUT[5].z, TEMP[1], CONST[12] 22: MOV OUT[1], IN[3] 23: MOV OUT[2], IMM[0].zzzz 24: MUL TEMP[0], TEMP[3].yyyy, CONST[1] 25: MAD TEMP[0], CONST[0], TEMP[3].xxxx, TEMP[0] 26: MAD TEMP[0], CONST[2], TEMP[3].zzzz, TEMP[0] 27: MAD TEMP[0], CONST[3], TEMP[3].wwww, TEMP[0] 28: MOV OUT[3], TEMP[0] 29: MOV OUT[0], TEMP[0] 30: MOV OUT[4].w, IMM[0].wwww 31: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %58 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 %60 = add i32 %5, %7 %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %60) %62 = extractelement <4 x float> %61, i32 0 %63 = extractelement <4 x float> %61, i32 1 %64 = extractelement <4 x float> %61, i32 2 %65 = extractelement <4 x float> %61, i32 3 %66 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = add i32 %5, %7 %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %68) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = extractelement <4 x float> %69, i32 2 %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = add i32 %5, %7 %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %75) %77 = extractelement <4 x float> %76, i32 0 %78 = extractelement <4 x float> %76, i32 1 %79 = extractelement <4 x float> %76, i32 2 %80 = extractelement <4 x float> %76, i32 3 %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = add i32 %5, %7 %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = extractelement <4 x float> %84, i32 3 %89 = fmul float %71, 0x3F80101020000000 %90 = fadd float %89, -1.000000e+00 %91 = fmul float %72, 0x3F80101020000000 %92 = fadd float %91, -1.000000e+00 %93 = fmul float %70, 0x3F80101020000000 %94 = fadd float %93, -1.000000e+00 %95 = fmul float %77, 0x3F80101020000000 %96 = fadd float %95, -1.000000e+00 %97 = fmul float %78, 0x3F80101020000000 %98 = fadd float %97, -1.000000e+00 %99 = fmul float %79, 0x3F80101020000000 %100 = fadd float %99, -1.000000e+00 %101 = fmul float %80, 0x3F80101020000000 %102 = fadd float %101, -1.000000e+00 %103 = fmul float %90, %100 %104 = fmul float %92, %96 %105 = fmul float %94, %98 %106 = fmul float %98, %92 %107 = fsub float %106, %103 %108 = fmul float %100, %94 %109 = fsub float %108, %104 %110 = fmul float %96, %90 %111 = fsub float %110, %105 %112 = fmul float %102, %107 %113 = fmul float %102, %109 %114 = fmul float %102, %111 %115 = fmul float %98, %114 %116 = fmul float %100, %112 %117 = fmul float %96, %113 %118 = fmul float %113, %100 %119 = fsub float %118, %115 %120 = fmul float %114, %96 %121 = fsub float %120, %116 %122 = fmul float %112, %98 %123 = fsub float %122, %117 %124 = fmul float %102, %119 %125 = fmul float %102, %121 %126 = fmul float %102, %123 %127 = fmul float %37, %63 %128 = fmul float %38, %63 %129 = fmul float %39, %63 %130 = fmul float %40, %63 %131 = fmul float %33, %62 %132 = fadd float %131, %127 %133 = fmul float %34, %62 %134 = fadd float %133, %128 %135 = fmul float %35, %62 %136 = fadd float %135, %129 %137 = fmul float %36, %62 %138 = fadd float %137, %130 %139 = fmul float %41, %64 %140 = fadd float %139, %132 %141 = fmul float %42, %64 %142 = fadd float %141, %134 %143 = fmul float %43, %64 %144 = fadd float %143, %136 %145 = fmul float %44, %64 %146 = fadd float %145, %138 %147 = fmul float %45, %65 %148 = fadd float %147, %140 %149 = fmul float %46, %65 %150 = fadd float %149, %142 %151 = fmul float %47, %65 %152 = fadd float %151, %144 %153 = fmul float %48, %65 %154 = fadd float %153, %146 %155 = fmul float %32, %148 %156 = fsub float %29, %155 %157 = fmul float %32, %150 %158 = fsub float %30, %157 %159 = fmul float %32, %152 %160 = fsub float %31, %159 %161 = fmul float %158, %52 %162 = fmul float %158, %53 %163 = fmul float %158, %54 %164 = fmul float %49, %156 %165 = fadd float %164, %161 %166 = fmul float %50, %156 %167 = fadd float %166, %162 %168 = fmul float %51, %156 %169 = fadd float %168, %163 %170 = fmul float %55, %160 %171 = fadd float %170, %165 %172 = fmul float %56, %160 %173 = fadd float %172, %167 %174 = fmul float %57, %160 %175 = fadd float %174, %169 %176 = fmul float %124, %171 %177 = fmul float %125, %173 %178 = fadd float %177, %176 %179 = fmul float %126, %175 %180 = fadd float %178, %179 %181 = fmul float %124, %55 %182 = fmul float %125, %56 %183 = fadd float %182, %181 %184 = fmul float %126, %57 %185 = fadd float %183, %184 %186 = fmul float %112, %171 %187 = fmul float %113, %173 %188 = fadd float %187, %186 %189 = fmul float %114, %175 %190 = fadd float %188, %189 %191 = fmul float %112, %55 %192 = fmul float %113, %56 %193 = fadd float %192, %191 %194 = fmul float %114, %57 %195 = fadd float %193, %194 %196 = fmul float %96, %171 %197 = fmul float %98, %173 %198 = fadd float %197, %196 %199 = fmul float %100, %175 %200 = fadd float %198, %199 %201 = fmul float %96, %55 %202 = fmul float %98, %56 %203 = fadd float %202, %201 %204 = fmul float %100, %57 %205 = fadd float %203, %204 %206 = fmul float %150, %17 %207 = fmul float %150, %18 %208 = fmul float %150, %19 %209 = fmul float %150, %20 %210 = fmul float %13, %148 %211 = fadd float %210, %206 %212 = fmul float %14, %148 %213 = fadd float %212, %207 %214 = fmul float %15, %148 %215 = fadd float %214, %208 %216 = fmul float %16, %148 %217 = fadd float %216, %209 %218 = fmul float %21, %152 %219 = fadd float %218, %211 %220 = fmul float %22, %152 %221 = fadd float %220, %213 %222 = fmul float %23, %152 %223 = fadd float %222, %215 %224 = fmul float %24, %152 %225 = fadd float %224, %217 %226 = fmul float %25, %154 %227 = fadd float %226, %219 %228 = fmul float %26, %154 %229 = fadd float %228, %221 %230 = fmul float %27, %154 %231 = fadd float %230, %223 %232 = fmul float %28, %154 %233 = fadd float %232, %225 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %85, float %86, float %87, float %88) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %227, float %229, float %231, float %233) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %180, float %190, float %200, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %185, float %195, float %205, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %227, float %229, float %231, float %233) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0x3c008081 ; 7E0202FF 3C008081 v_mov_b32_e32 v2, 0 ; 7E040280 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s20, s[0:3], 0x1d ; C20A011D buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 buffer_load_format_xyzw v[8:11], v0, s[12:15], 0 idxen ; E00C2000 80030800 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00 buffer_load_format_xyzw v[15:18], v0, s[8:11], 0 idxen ; E00C2000 80020F00 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_buffer_load_dword s5, s[0:3], 0x1f ; C202811F s_buffer_load_dword s6, s[0:3], 0x20 ; C2030120 s_buffer_load_dword s7, s[0:3], 0x21 ; C2038121 s_buffer_load_dword s8, s[0:3], 0x18 ; C2040118 s_buffer_load_dword s9, s[0:3], 0x19 ; C2048119 s_buffer_load_dword s10, s[0:3], 0x1a ; C205011A s_buffer_load_dword s11, s[0:3], 0x1b ; C205811B s_buffer_load_dword s12, s[0:3], 0x1c ; C206011C s_buffer_load_dword s13, s[0:3], 0x22 ; C2068122 s_buffer_load_dword s14, s[0:3], 0x23 ; C2070123 s_buffer_load_dword s15, s[0:3], 0x24 ; C2078124 s_buffer_load_dword s16, s[0:3], 0x25 ; C2080125 s_buffer_load_dword s17, s[0:3], 0x26 ; C2088126 s_buffer_load_dword s18, s[0:3], 0xf ; C209010F s_buffer_load_dword s19, s[0:3], 0x10 ; C2098110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x12 ; C20B0112 s_buffer_load_dword s23, s[0:3], 0x13 ; C20B8113 s_buffer_load_dword s24, s[0:3], 0x27 ; C20C0127 s_buffer_load_dword s25, s[0:3], 0x28 ; C20C8128 s_buffer_load_dword s26, s[0:3], 0x29 ; C20D0129 s_buffer_load_dword s27, s[0:3], 0x2a ; C20D812A s_buffer_load_dword s28, s[0:3], 0x2c ; C20E012C s_buffer_load_dword s29, s[0:3], 0x0 ; C20E8100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s19 ; 7E000213 s_buffer_load_dword s19, s[0:3], 0x1 ; C2098101 v_mov_b32_e32 v19, s21 ; 7E260215 s_buffer_load_dword s21, s[0:3], 0x2 ; C20A8102 v_mov_b32_e32 v20, s22 ; 7E280216 s_buffer_load_dword s22, s[0:3], 0x3 ; C20B0103 s_buffer_load_dword s30, s[0:3], 0x4 ; C20F0104 s_buffer_load_dword s31, s[0:3], 0x5 ; C20F8105 s_buffer_load_dword s32, s[0:3], 0x6 ; C2100106 s_buffer_load_dword s33, s[0:3], 0x7 ; C2108107 s_buffer_load_dword s34, s[0:3], 0x8 ; C2110108 s_buffer_load_dword s35, s[0:3], 0x9 ; C2118109 s_buffer_load_dword s36, s[0:3], 0x2d ; C212012D s_buffer_load_dword s37, s[0:3], 0x2e ; C212812E s_buffer_load_dword s38, s[0:3], 0x30 ; C2130130 s_buffer_load_dword s39, s[0:3], 0x31 ; C2138131 s_buffer_load_dword s40, s[0:3], 0x32 ; C2140132 s_buffer_load_dword s41, s[0:3], 0xa ; C214810A s_buffer_load_dword s42, s[0:3], 0xb ; C215010B s_buffer_load_dword s43, s[0:3], 0xc ; C215810C s_buffer_load_dword s44, s[0:3], 0xd ; C216010D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E v_mul_f32_e32 v21, s12, v5 ; 102A0A0C v_mad_f32 v9, v9, v1, -1.0 ; D2820009 03CE0309 v_mad_f32 v10, v10, v1, -1.0 ; D282000A 03CE030A v_mad_f32 v8, v8, v1, -1.0 ; D2820008 03CE0308 v_mad_f32 v11, v11, v1, -1.0 ; D282000B 03CE030B v_mad_f32 v12, v12, v1, -1.0 ; D282000C 03CE030C v_mad_f32 v13, v13, v1, -1.0 ; D282000D 03CE030D v_mad_f32 v1, v14, v1, -1.0 ; D2820001 03CE030E v_mad_f32 v14, s8, v4, v21 ; D282000E 04560808 v_mul_f32_e32 v21, s20, v5 ; 102A0A14 v_mad_f32 v21, s9, v4, v21 ; D2820015 04560809 v_mul_f32_e32 v22, s4, v5 ; 102C0A04 v_mad_f32 v22, s10, v4, v22 ; D2820016 045A080A v_mul_f32_e32 v5, s5, v5 ; 100A0A05 v_mad_f32 v4, s11, v4, v5 ; D2820004 0416080B v_mad_f32 v5, s6, v6, v14 ; D2820005 043A0C06 v_mad_f32 v14, s7, v6, v21 ; D282000E 04560C07 v_mad_f32 v21, s13, v6, v22 ; D2820015 045A0C0D v_mad_f32 v4, s14, v6, v4 ; D2820004 04120C0E v_mad_f32 v5, s15, v7, v5 ; D2820005 04160E0F v_mad_f32 v6, s16, v7, v14 ; D2820006 043A0E10 v_mad_f32 v14, s17, v7, v21 ; D282000E 04560E11 v_mad_f32 v4, s24, v7, v4 ; D2820004 04120E18 exp 15, 32, 0, 0, 0, v15, v16, v17, v18 ; F800020F 1211100F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s30, v6 ; 100E0C1E v_mad_f32 v7, s29, v5, v7 ; D2820007 041E0A1D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v15, s31, v6 ; 101E0C1F v_mad_f32 v15, s19, v5, v15 ; D282000F 043E0A13 v_mul_f32_e32 v16, s32, v6 ; 10200C20 v_mad_f32 v16, s21, v5, v16 ; D2820010 04420A15 v_mul_f32_e32 v17, s33, v6 ; 10220C21 v_mad_f32 v17, s22, v5, v17 ; D2820011 04460A16 v_mad_f32 v0, -s23, v5, v0 ; D2820000 24020A17 v_mad_f32 v5, -s23, v6, v19 ; D2820005 244E0C17 v_mul_f32_e32 v6, s28, v5 ; 100C0A1C v_mul_f32_e32 v18, s36, v5 ; 10240A24 v_mul_f32_e32 v5, s37, v5 ; 100A0A25 v_mad_f32 v6, s25, v0, v6 ; D2820006 041A0019 v_mad_f32 v18, s26, v0, v18 ; D2820012 044A001A v_mad_f32 v0, s27, v0, v5 ; D2820000 0416001B v_mad_f32 v5, -s23, v14, v20 ; D2820005 24521C17 v_mad_f32 v7, s34, v14, v7 ; D2820007 041E1C22 v_mad_f32 v15, s35, v14, v15 ; D282000F 043E1C23 v_mad_f32 v16, s41, v14, v16 ; D2820010 04421C29 v_mad_f32 v14, s42, v14, v17 ; D282000E 04461C2A v_mad_f32 v7, s43, v4, v7 ; D2820007 041E082B v_mad_f32 v15, s44, v4, v15 ; D282000F 043E082C v_mad_f32 v16, s0, v4, v16 ; D2820010 04420800 v_mad_f32 v4, s18, v4, v14 ; D2820004 043A0812 v_mul_f32_e32 v14, v13, v9 ; 101C130D v_mad_f32 v14, v12, v10, -v14 ; D282000E 843A150C v_mul_f32_e32 v10, v11, v10 ; 1014150B v_mad_f32 v10, v13, v8, -v10 ; D282000A 842A110D v_mul_f32_e32 v8, v12, v8 ; 1010110C v_mad_f32 v8, v11, v9, -v8 ; D2820008 8422130B v_mul_f32_e32 v9, v14, v1 ; 1012030E v_mul_f32_e32 v10, v10, v1 ; 1014030A v_mul_f32_e32 v8, v8, v1 ; 10100308 v_mul_f32_e32 v14, v8, v12 ; 101C1908 v_mad_f32 v14, v10, v13, -v14 ; D282000E 843A1B0A v_mul_f32_e32 v17, v9, v13 ; 10221B09 v_mad_f32 v17, v8, v11, -v17 ; D2820011 84461708 v_mul_f32_e32 v19, v10, v11 ; 1026170A v_mad_f32 v19, v9, v12, -v19 ; D2820013 844E1909 v_mul_f32_e32 v14, v14, v1 ; 101C030E v_mul_f32_e32 v17, v17, v1 ; 10220311 v_mul_f32_e32 v1, v19, v1 ; 10020313 v_mad_f32 v6, s38, v5, v6 ; D2820006 041A0A26 v_mad_f32 v18, s39, v5, v18 ; D2820012 044A0A27 v_mad_f32 v0, s40, v5, v0 ; D2820000 04020A28 v_mul_f32_e32 v5, s38, v11 ; 100A1626 v_mul_f32_e32 v19, s38, v9 ; 10261226 v_mul_f32_e32 v20, s38, v14 ; 10281C26 v_mul_f32_e32 v14, v6, v14 ; 101C1D06 v_mul_f32_e32 v9, v6, v9 ; 10121306 v_mul_f32_e32 v6, v6, v11 ; 100C1706 v_mad_f32 v11, v10, s39, v19 ; D282000B 044C4F0A v_mad_f32 v9, v10, v18, v9 ; D2820009 0426250A v_mad_f32 v5, v12, s39, v5 ; D2820005 04144F0C v_mad_f32 v6, v12, v18, v6 ; D2820006 041A250C v_mad_f32 v10, v17, v18, v14 ; D282000A 043A2511 v_mad_f32 v10, v1, v0, v10 ; D282000A 042A0101 v_mad_f32 v9, v8, v0, v9 ; D2820009 04260108 v_mad_f32 v0, v13, v0, v6 ; D2820000 041A010D exp 15, 33, 0, 0, 0, v2, v2, v2, v2 ; F800021F 02020202 exp 15, 34, 0, 0, 0, v7, v15, v16, v4 ; F800022F 04100F07 exp 15, 35, 0, 0, 0, v10, v9, v0, v3 ; F800023F 0300090A s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, v17, s39, v20 ; D2820000 04504F11 v_mad_f32 v3, v8, s40, v11 ; D2820003 042C5108 v_mad_f32 v0, v1, s40, v0 ; D2820000 04005101 v_mad_f32 v1, v13, s40, v5 ; D2820001 0414510D exp 15, 36, 0, 0, 0, v0, v3, v1, v2 ; F800024F 02010300 exp 15, 12, 0, 1, 0, v7, v15, v16, v4 ; F80008CF 04100F07 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 24 Code Size: 920 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[5], PERSPECTIVE DCL IN[1], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0..9] DCL TEMP[0] DCL TEMP[1], LOCAL DCL TEMP[2..3] IMM[0] FLT32 { 1.0000, 0.5000, -0.5000, 0.0039} IMM[1] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[1], IN[1] 1: RSQ TEMP[1], |TEMP[0].xxxx| 2: MIN TEMP[0].x, IMM[1].xxxx, TEMP[1] 3: MUL TEMP[0].x, TEMP[0].xxxx, IN[1].zzzz 4: MAD TEMP[0].xy, TEMP[0].xxxx, IMM[0].yzzw, IMM[0].yyyy 5: MUL TEMP[0].xy, TEMP[0], TEMP[0] 6: MOV TEMP[2].x, IMM[0].xxxx 7: ADD TEMP[2].xyz, TEMP[2].xxxx, -CONST[0] 8: MUL TEMP[3].xyz, CONST[6], CONST[6].wwww 9: MUL TEMP[2].xyz, TEMP[2], TEMP[3] 10: MAD TEMP[2].xyz, TEMP[2], CONST[4].wwww, CONST[4] 11: MUL TEMP[0].yzw, TEMP[0].yyyy, TEMP[2].xxyz 12: MUL TEMP[3].xyz, TEMP[0].xxxx, TEMP[2] 13: MUL TEMP[0].xyz, TEMP[0].yzww, CONST[8] 14: MAD TEMP[0].xyz, TEMP[3], CONST[7], TEMP[0] 15: MOV TEMP[0].w, CONST[9].wwww 16: MAD TEMP[0].xyz, TEMP[0], TEMP[0].wwww, CONST[0] 17: MAD OUT[0].xyz, TEMP[2], CONST[9], TEMP[0] 18: MUL OUT[0].w, IMM[0].wwww, IN[0].wwww 19: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %45 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %49 = fmul float %46, %46 %50 = fmul float %47, %47 %51 = fadd float %50, %49 %52 = fmul float %48, %48 %53 = fadd float %51, %52 %54 = call float @fabs(float %53) %55 = call float @llvm.AMDGPU.rsq.clamped.f32(float %54) %56 = call float @llvm.minnum.f32(float %55, float 0x47EFFFFFE0000000) %57 = fmul float %56, %48 %58 = fmul float %57, 5.000000e-01 %59 = fadd float %58, 5.000000e-01 %60 = fmul float %57, -5.000000e-01 %61 = fadd float %60, 5.000000e-01 %62 = fmul float %59, %59 %63 = fmul float %61, %61 %64 = fsub float 1.000000e+00, %24 %65 = fsub float 1.000000e+00, %25 %66 = fsub float 1.000000e+00, %26 %67 = fmul float %31, %34 %68 = fmul float %32, %34 %69 = fmul float %33, %34 %70 = fmul float %64, %67 %71 = fmul float %65, %68 %72 = fmul float %66, %69 %73 = fmul float %70, %30 %74 = fadd float %73, %27 %75 = fmul float %71, %30 %76 = fadd float %75, %28 %77 = fmul float %72, %30 %78 = fadd float %77, %29 %79 = fmul float %63, %74 %80 = fmul float %63, %76 %81 = fmul float %63, %78 %82 = fmul float %62, %74 %83 = fmul float %62, %76 %84 = fmul float %62, %78 %85 = fmul float %79, %38 %86 = fmul float %80, %39 %87 = fmul float %81, %40 %88 = fmul float %82, %35 %89 = fadd float %88, %85 %90 = fmul float %83, %36 %91 = fadd float %90, %86 %92 = fmul float %84, %37 %93 = fadd float %92, %87 %94 = fmul float %89, %44 %95 = fadd float %94, %24 %96 = fmul float %91, %44 %97 = fadd float %96, %25 %98 = fmul float %93, %44 %99 = fadd float %98, %26 %100 = fmul float %74, %41 %101 = fadd float %100, %95 %102 = fmul float %76, %42 %103 = fadd float %102, %97 %104 = fmul float %78, %43 %105 = fadd float %104, %99 %106 = fmul float %45, 3.906250e-03 %107 = call i32 @llvm.SI.packf16(float %101, float %103) %108 = bitcast i32 %107 to float %109 = call i32 @llvm.SI.packf16(float %105, float %106) %110 = bitcast i32 %109 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %108, float %110, float %108, float %110) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x10 ; C2038110 s_buffer_load_dword s8, s[0:3], 0x11 ; C2040111 s_buffer_load_dword s10, s[0:3], 0x12 ; C2050112 s_buffer_load_dword s11, s[0:3], 0x13 ; C2058113 s_buffer_load_dword s12, s[0:3], 0x18 ; C2060118 s_buffer_load_dword s13, s[0:3], 0x19 ; C2068119 s_buffer_load_dword s14, s[0:3], 0x1a ; C207011A s_mov_b32 m0, s9 ; BEFC0309 s_buffer_load_dword s9, s[0:3], 0x1b ; C204811B s_buffer_load_dword s15, s[0:3], 0x1c ; C207811C s_buffer_load_dword s16, s[0:3], 0x1d ; C208011D s_buffer_load_dword s17, s[0:3], 0x1e ; C208811E s_buffer_load_dword s18, s[0:3], 0x20 ; C2090120 s_buffer_load_dword s19, s[0:3], 0x21 ; C2098121 s_buffer_load_dword s20, s[0:3], 0x22 ; C20A0122 s_buffer_load_dword s21, s[0:3], 0x24 ; C20A8124 s_buffer_load_dword s22, s[0:3], 0x25 ; C20B0125 s_buffer_load_dword s23, s[0:3], 0x26 ; C20B8126 s_buffer_load_dword s0, s[0:3], 0x27 ; C2000127 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600 v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601 v_mul_f32_e32 v1, v3, v3 ; 10020703 v_mad_f32 v1, v4, v4, v1 ; D2820001 04060904 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s9 ; 7E060209 v_mul_f32_e32 v3, s12, v3 ; 1006060C v_mov_b32_e32 v4, s9 ; 7E080209 v_mul_f32_e32 v4, s13, v4 ; 1008080D v_mov_b32_e32 v5, s9 ; 7E0A0209 v_mul_f32_e32 v5, s14, v5 ; 100A0A0E v_sub_f32_e64 v6, 1.0, s4 ; D2080006 000008F2 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_sub_f32_e64 v6, 1.0, s5 ; D2080006 00000AF2 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_sub_f32_e64 v6, 1.0, s6 ; D2080006 00000CF2 v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_mad_f32 v1, v0, v0, v1 ; D2820001 04060100 v_rsq_clamp_f32_e64 v1, |v1| ; D3580101 00000101 v_mov_b32_e32 v6, s7 ; 7E0C0207 v_mad_f32 v3, s11, v3, v6 ; D2820003 041A060B v_mov_b32_e32 v6, s8 ; 7E0C0208 v_mad_f32 v4, s11, v4, v6 ; D2820004 041A080B v_mov_b32_e32 v6, s10 ; 7E0C020A v_mad_f32 v5, s11, v5, v6 ; D2820005 041A0A0B v_min_f32_e32 v1, 0x7f7fffff, v1 ; 1E0202FF 7F7FFFFF v_mul_f32_e32 v0, v0, v1 ; 10000300 v_mad_f32 v1, -0.5, v0, 0.5 ; D2820001 03C200F1 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_mul_f32_e32 v6, v3, v1 ; 100C0303 v_mul_f32_e32 v6, s18, v6 ; 100C0C12 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v7, v3, v0 ; 100E0103 v_mad_f32 v6, v7, s15, v6 ; D2820006 04181F07 v_mul_f32_e32 v7, v4, v1 ; 100E0304 v_mul_f32_e32 v7, s19, v7 ; 100E0E13 v_mul_f32_e32 v8, v4, v0 ; 10100104 v_mad_f32 v7, v8, s16, v7 ; D2820007 041C2108 v_mul_f32_e32 v1, v5, v1 ; 10020305 v_mul_f32_e32 v1, s20, v1 ; 10020214 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mad_f32 v0, v0, s17, v1 ; D2820000 04042300 v_mov_b32_e32 v1, s4 ; 7E020204 v_mad_f32 v1, s0, v6, v1 ; D2820001 04060C00 v_mov_b32_e32 v6, s5 ; 7E0C0205 v_mad_f32 v6, s0, v7, v6 ; D2820006 041A0E00 v_mov_b32_e32 v7, s6 ; 7E0E0206 v_mad_f32 v0, s0, v0, v7 ; D2820000 041E0000 v_mad_f32 v1, v3, s21, v1 ; D2820001 04042B03 v_mad_f32 v3, v4, s22, v6 ; D2820003 04182D04 v_mad_f32 v0, v5, s23, v0 ; D2820000 04002F05 v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 v_mul_f32_e32 v2, 0x3b800000, v2 ; 100404FF 3B800000 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 440 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[5], PERSPECTIVE DCL IN[1], TEXCOORD[7], PERSPECTIVE DCL IN[2], TEXCOORD[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..8] DCL TEMP[0] DCL TEMP[1], LOCAL DCL TEMP[2..3] IMM[0] FLT32 { 1.0000, 0.5000, -0.5000, 0.0039} IMM[1] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[1], IN[1] 1: RSQ TEMP[1], |TEMP[0].xxxx| 2: MIN TEMP[0].x, IMM[1].xxxx, TEMP[1] 3: MUL TEMP[0].x, TEMP[0].xxxx, IN[1].zzzz 4: MAD TEMP[0].xy, TEMP[0].xxxx, IMM[0].yzzw, IMM[0].yyyy 5: MUL TEMP[0].xy, TEMP[0], TEMP[0] 6: MOV TEMP[2].x, IMM[0].xxxx 7: ADD TEMP[2].xyz, TEMP[2].xxxx, -CONST[0] 8: TEX TEMP[3], IN[2], SAMP[0], 2D 9: MUL TEMP[2].xyz, TEMP[2], TEMP[3] 10: MAD TEMP[2].xyz, TEMP[2], CONST[4].wwww, CONST[4] 11: MUL TEMP[0].yzw, TEMP[0].yyyy, TEMP[2].xxyz 12: MUL TEMP[3].xyz, TEMP[0].xxxx, TEMP[2] 13: MUL TEMP[0].xyz, TEMP[0].yzww, CONST[7] 14: MAD TEMP[0].xyz, TEMP[3], CONST[6], TEMP[0] 15: MOV TEMP[0].w, CONST[8].wwww 16: MAD TEMP[0].xyz, TEMP[0], TEMP[0].wwww, CONST[0] 17: MAD OUT[0].xyz, TEMP[2], CONST[8], TEMP[0] 18: MUL OUT[0].w, IMM[0].wwww, IN[0].wwww 19: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %41 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %42 = load <32 x i8>, <32 x i8> addrspace(2)* %41, align 32, !tbaa !0 %43 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %51 = fmul float %46, %46 %52 = fmul float %47, %47 %53 = fadd float %52, %51 %54 = fmul float %48, %48 %55 = fadd float %53, %54 %56 = call float @fabs(float %55) %57 = call float @llvm.AMDGPU.rsq.clamped.f32(float %56) %58 = call float @llvm.minnum.f32(float %57, float 0x47EFFFFFE0000000) %59 = fmul float %58, %48 %60 = fmul float %59, 5.000000e-01 %61 = fadd float %60, 5.000000e-01 %62 = fmul float %59, -5.000000e-01 %63 = fadd float %62, 5.000000e-01 %64 = fmul float %61, %61 %65 = fmul float %63, %63 %66 = fsub float 1.000000e+00, %24 %67 = fsub float 1.000000e+00, %25 %68 = fsub float 1.000000e+00, %26 %69 = bitcast float %49 to i32 %70 = bitcast float %50 to i32 %71 = insertelement <2 x i32> undef, i32 %69, i32 0 %72 = insertelement <2 x i32> %71, i32 %70, i32 1 %73 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %72, <32 x i8> %42, <16 x i8> %44, i32 2) %74 = extractelement <4 x float> %73, i32 0 %75 = extractelement <4 x float> %73, i32 1 %76 = extractelement <4 x float> %73, i32 2 %77 = fmul float %66, %74 %78 = fmul float %67, %75 %79 = fmul float %68, %76 %80 = fmul float %77, %30 %81 = fadd float %80, %27 %82 = fmul float %78, %30 %83 = fadd float %82, %28 %84 = fmul float %79, %30 %85 = fadd float %84, %29 %86 = fmul float %65, %81 %87 = fmul float %65, %83 %88 = fmul float %65, %85 %89 = fmul float %64, %81 %90 = fmul float %64, %83 %91 = fmul float %64, %85 %92 = fmul float %86, %34 %93 = fmul float %87, %35 %94 = fmul float %88, %36 %95 = fmul float %89, %31 %96 = fadd float %95, %92 %97 = fmul float %90, %32 %98 = fadd float %97, %93 %99 = fmul float %91, %33 %100 = fadd float %99, %94 %101 = fmul float %96, %40 %102 = fadd float %101, %24 %103 = fmul float %98, %40 %104 = fadd float %103, %25 %105 = fmul float %100, %40 %106 = fadd float %105, %26 %107 = fmul float %81, %37 %108 = fadd float %107, %102 %109 = fmul float %83, %38 %110 = fadd float %109, %104 %111 = fmul float %85, %39 %112 = fadd float %111, %106 %113 = fmul float %45, 3.906250e-03 %114 = call i32 @llvm.SI.packf16(float %108, float %110) %115 = bitcast i32 %114 to float %116 = call i32 @llvm.SI.packf16(float %112, float %113) %117 = bitcast i32 %116 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %115, float %117, float %115, float %117) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[4:7] ; F0800700 00220606 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x10 ; C2038110 s_buffer_load_dword s8, s[0:3], 0x11 ; C2040111 s_buffer_load_dword s9, s[0:3], 0x12 ; C2048112 s_buffer_load_dword s10, s[0:3], 0x13 ; C2050113 s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s12, s[0:3], 0x19 ; C2060119 s_buffer_load_dword s13, s[0:3], 0x1a ; C206811A s_buffer_load_dword s14, s[0:3], 0x1c ; C207011C s_buffer_load_dword s15, s[0:3], 0x1d ; C207811D s_buffer_load_dword s16, s[0:3], 0x1e ; C208011E s_buffer_load_dword s17, s[0:3], 0x20 ; C2088120 s_buffer_load_dword s18, s[0:3], 0x21 ; C2090121 s_buffer_load_dword s19, s[0:3], 0x22 ; C2098122 s_buffer_load_dword s0, s[0:3], 0x23 ; C2000123 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_sub_f32_e64 v0, 1.0, s4 ; D2080000 000008F2 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_sub_f32_e64 v1, 1.0, s5 ; D2080001 00000AF2 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_sub_f32_e64 v6, 1.0, s6 ; D2080006 00000CF2 v_mul_f32_e32 v6, v8, v6 ; 100C0D08 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mad_f32 v3, v4, v4, v3 ; D2820003 040E0904 v_mad_f32 v3, v5, v5, v3 ; D2820003 040E0B05 v_rsq_clamp_f32_e64 v3, |v3| ; D3580103 00000103 v_mov_b32_e32 v4, s7 ; 7E080207 v_mad_f32 v0, s10, v0, v4 ; D2820000 0412000A v_mov_b32_e32 v4, s8 ; 7E080208 v_mad_f32 v1, s10, v1, v4 ; D2820001 0412020A v_mov_b32_e32 v4, s9 ; 7E080209 v_mad_f32 v4, s10, v6, v4 ; D2820004 04120C0A v_min_f32_e32 v3, 0x7f7fffff, v3 ; 1E0606FF 7F7FFFFF v_mul_f32_e32 v3, v5, v3 ; 10060705 v_mad_f32 v5, -0.5, v3, 0.5 ; D2820005 03C206F1 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_mul_f32_e32 v6, v0, v5 ; 100C0B00 v_mul_f32_e32 v6, s14, v6 ; 100C0C0E v_mad_f32 v3, 0.5, v3, 0.5 ; D2820003 03C206F0 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mul_f32_e32 v7, v0, v3 ; 100E0700 v_mad_f32 v6, v7, s11, v6 ; D2820006 04181707 v_mul_f32_e32 v7, v1, v5 ; 100E0B01 v_mul_f32_e32 v7, s15, v7 ; 100E0E0F v_mul_f32_e32 v8, v1, v3 ; 10100701 v_mad_f32 v7, v8, s12, v7 ; D2820007 041C1908 v_mul_f32_e32 v5, v4, v5 ; 100A0B04 v_mul_f32_e32 v5, s16, v5 ; 100A0A10 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_mad_f32 v3, v3, s13, v5 ; D2820003 04141B03 v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v5, s0, v6, v5 ; D2820005 04160C00 v_mov_b32_e32 v6, s5 ; 7E0C0205 v_mad_f32 v6, s0, v7, v6 ; D2820006 041A0E00 v_mov_b32_e32 v7, s6 ; 7E0E0206 v_mad_f32 v3, s0, v3, v7 ; D2820003 041E0600 v_mad_f32 v0, v0, s17, v5 ; D2820000 04142300 v_mad_f32 v1, v1, s18, v6 ; D2820001 04182501 v_mad_f32 v3, v4, s19, v3 ; D2820003 040C2704 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_mul_f32_e32 v1, 0x3b800000, v2 ; 100204FF 3B800000 v_cvt_pkrtz_f16_f32_e32 v1, v3, v1 ; 5E020303 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 436 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], TEXCOORD[0] DCL OUT[3].xyz, TEXCOORD[4] DCL OUT[4], TEXCOORD[5] DCL OUT[5], TEXCOORD[6] DCL OUT[6], TEXCOORD[7] DCL CONST[0..13] DCL TEMP[0..4] IMM[0] FLT32 { 0.0078, -1.0000, 0.0000, 1.0000} 0: MUL TEMP[0], CONST[7], IN[0].yyyy 1: MAD TEMP[0], CONST[6], IN[0].xxxx, TEMP[0] 2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[9], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 5: MAD TEMP[1], CONST[0], TEMP[0].xxxx, TEMP[1] 6: MAD TEMP[1], CONST[2], TEMP[0].zzzz, TEMP[1] 7: MAD OUT[0], CONST[3], TEMP[0].wwww, TEMP[1] 8: MAD TEMP[1].xyz, TEMP[0], -CONST[4].wwww, CONST[4] 9: MOV OUT[6], TEMP[0] 10: MUL TEMP[0].xyz, TEMP[1].yyyy, CONST[11] 11: MAD TEMP[0].xyz, CONST[10], TEMP[1].xxxx, TEMP[0] 12: MAD TEMP[0].xyz, CONST[12], TEMP[1].zzzz, TEMP[0] 13: MAD TEMP[1].xyz, IN[1].yzxw, IMM[0].xxxx, IMM[0].yyyy 14: MAD TEMP[2], IN[2], IMM[0].xxxx, IMM[0].yyyy 15: MUL TEMP[3].xyz, TEMP[1], TEMP[2].zxyw 16: MAD TEMP[1].xyz, TEMP[2].yzxw, TEMP[1].yzxw, -TEMP[3] 17: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[1] 18: MUL TEMP[3].xyz, TEMP[2].yzxw, TEMP[1].zxyw 19: MAD TEMP[3].xyz, TEMP[1].yzxw, TEMP[2].zxyw, -TEMP[3] 20: MUL TEMP[3].xyz, TEMP[2].wwww, TEMP[3] 21: DP3 OUT[5].x, TEMP[3], TEMP[0] 22: DP3 OUT[5].y, TEMP[1], TEMP[0] 23: DP3 OUT[5].z, TEMP[2], TEMP[0] 24: MOV TEMP[0].xyz, CONST[13] 25: MUL TEMP[4].xyz, TEMP[0].yyyy, CONST[11] 26: MAD TEMP[0].xyw, CONST[10].xyzz, TEMP[0].xxxx, TEMP[4].xyzz 27: MAD TEMP[0].xyz, CONST[12], TEMP[0].zzzz, TEMP[0].xyww 28: DP3 OUT[3].x, TEMP[3], TEMP[0] 29: DP3 OUT[3].y, TEMP[1], TEMP[0] 30: DP3 OUT[3].z, TEMP[2], TEMP[0] 31: MOV OUT[1], IN[3] 32: MOV OUT[2], IMM[0].zzzz 33: MOV OUT[4], IMM[0].zzzz 34: MOV OUT[5].w, IMM[0].wwww 35: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %61 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = add i32 %5, %7 %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %62, i32 0, i32 %63) %65 = extractelement <4 x float> %64, i32 0 %66 = extractelement <4 x float> %64, i32 1 %67 = extractelement <4 x float> %64, i32 2 %68 = extractelement <4 x float> %64, i32 3 %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = add i32 %5, %7 %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %71) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = extractelement <4 x float> %72, i32 2 %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !tbaa !0 %78 = add i32 %5, %7 %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %77, i32 0, i32 %78) %80 = extractelement <4 x float> %79, i32 0 %81 = extractelement <4 x float> %79, i32 1 %82 = extractelement <4 x float> %79, i32 2 %83 = extractelement <4 x float> %79, i32 3 %84 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %85 = load <16 x i8>, <16 x i8> addrspace(2)* %84, align 16, !tbaa !0 %86 = add i32 %5, %7 %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %85, i32 0, i32 %86) %88 = extractelement <4 x float> %87, i32 0 %89 = extractelement <4 x float> %87, i32 1 %90 = extractelement <4 x float> %87, i32 2 %91 = extractelement <4 x float> %87, i32 3 %92 = fmul float %37, %66 %93 = fmul float %38, %66 %94 = fmul float %39, %66 %95 = fmul float %40, %66 %96 = fmul float %33, %65 %97 = fadd float %96, %92 %98 = fmul float %34, %65 %99 = fadd float %98, %93 %100 = fmul float %35, %65 %101 = fadd float %100, %94 %102 = fmul float %36, %65 %103 = fadd float %102, %95 %104 = fmul float %41, %67 %105 = fadd float %104, %97 %106 = fmul float %42, %67 %107 = fadd float %106, %99 %108 = fmul float %43, %67 %109 = fadd float %108, %101 %110 = fmul float %44, %67 %111 = fadd float %110, %103 %112 = fmul float %45, %68 %113 = fadd float %112, %105 %114 = fmul float %46, %68 %115 = fadd float %114, %107 %116 = fmul float %47, %68 %117 = fadd float %116, %109 %118 = fmul float %48, %68 %119 = fadd float %118, %111 %120 = fmul float %115, %17 %121 = fmul float %115, %18 %122 = fmul float %115, %19 %123 = fmul float %115, %20 %124 = fmul float %13, %113 %125 = fadd float %124, %120 %126 = fmul float %14, %113 %127 = fadd float %126, %121 %128 = fmul float %15, %113 %129 = fadd float %128, %122 %130 = fmul float %16, %113 %131 = fadd float %130, %123 %132 = fmul float %21, %117 %133 = fadd float %132, %125 %134 = fmul float %22, %117 %135 = fadd float %134, %127 %136 = fmul float %23, %117 %137 = fadd float %136, %129 %138 = fmul float %24, %117 %139 = fadd float %138, %131 %140 = fmul float %25, %119 %141 = fadd float %140, %133 %142 = fmul float %26, %119 %143 = fadd float %142, %135 %144 = fmul float %27, %119 %145 = fadd float %144, %137 %146 = fmul float %28, %119 %147 = fadd float %146, %139 %148 = fmul float %32, %113 %149 = fsub float %29, %148 %150 = fmul float %32, %115 %151 = fsub float %30, %150 %152 = fmul float %32, %117 %153 = fsub float %31, %152 %154 = fmul float %151, %52 %155 = fmul float %151, %53 %156 = fmul float %151, %54 %157 = fmul float %49, %149 %158 = fadd float %157, %154 %159 = fmul float %50, %149 %160 = fadd float %159, %155 %161 = fmul float %51, %149 %162 = fadd float %161, %156 %163 = fmul float %55, %153 %164 = fadd float %163, %158 %165 = fmul float %56, %153 %166 = fadd float %165, %160 %167 = fmul float %57, %153 %168 = fadd float %167, %162 %169 = fmul float %74, 0x3F80101020000000 %170 = fadd float %169, -1.000000e+00 %171 = fmul float %75, 0x3F80101020000000 %172 = fadd float %171, -1.000000e+00 %173 = fmul float %73, 0x3F80101020000000 %174 = fadd float %173, -1.000000e+00 %175 = fmul float %80, 0x3F80101020000000 %176 = fadd float %175, -1.000000e+00 %177 = fmul float %81, 0x3F80101020000000 %178 = fadd float %177, -1.000000e+00 %179 = fmul float %82, 0x3F80101020000000 %180 = fadd float %179, -1.000000e+00 %181 = fmul float %83, 0x3F80101020000000 %182 = fadd float %181, -1.000000e+00 %183 = fmul float %170, %180 %184 = fmul float %172, %176 %185 = fmul float %174, %178 %186 = fmul float %178, %172 %187 = fsub float %186, %183 %188 = fmul float %180, %174 %189 = fsub float %188, %184 %190 = fmul float %176, %170 %191 = fsub float %190, %185 %192 = fmul float %182, %187 %193 = fmul float %182, %189 %194 = fmul float %182, %191 %195 = fmul float %178, %194 %196 = fmul float %180, %192 %197 = fmul float %176, %193 %198 = fmul float %193, %180 %199 = fsub float %198, %195 %200 = fmul float %194, %176 %201 = fsub float %200, %196 %202 = fmul float %192, %178 %203 = fsub float %202, %197 %204 = fmul float %182, %199 %205 = fmul float %182, %201 %206 = fmul float %182, %203 %207 = fmul float %204, %164 %208 = fmul float %205, %166 %209 = fadd float %208, %207 %210 = fmul float %206, %168 %211 = fadd float %209, %210 %212 = fmul float %192, %164 %213 = fmul float %193, %166 %214 = fadd float %213, %212 %215 = fmul float %194, %168 %216 = fadd float %214, %215 %217 = fmul float %176, %164 %218 = fmul float %178, %166 %219 = fadd float %218, %217 %220 = fmul float %180, %168 %221 = fadd float %219, %220 %222 = fmul float %59, %52 %223 = fmul float %59, %53 %224 = fmul float %59, %54 %225 = fmul float %49, %58 %226 = fadd float %225, %222 %227 = fmul float %50, %58 %228 = fadd float %227, %223 %229 = fmul float %51, %58 %230 = fadd float %229, %224 %231 = fmul float %55, %60 %232 = fadd float %231, %226 %233 = fmul float %56, %60 %234 = fadd float %233, %228 %235 = fmul float %57, %60 %236 = fadd float %235, %230 %237 = fmul float %204, %232 %238 = fmul float %205, %234 %239 = fadd float %238, %237 %240 = fmul float %206, %236 %241 = fadd float %239, %240 %242 = fmul float %192, %232 %243 = fmul float %193, %234 %244 = fadd float %243, %242 %245 = fmul float %194, %236 %246 = fadd float %244, %245 %247 = fmul float %176, %232 %248 = fmul float %178, %234 %249 = fadd float %248, %247 %250 = fmul float %180, %236 %251 = fadd float %249, %250 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %88, float %89, float %90, float %91) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %241, float %246, float %251, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %211, float %216, float %221, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %113, float %115, float %117, float %119) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %141, float %143, float %145, float %147) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 0x3c008081 ; 7E0202FF 3C008081 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s20, s[12:15], 0x1d ; C20A0D1D buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_buffer_load_dword s7, s[12:15], 0x1e ; C2038D1E s_buffer_load_dword s21, s[12:15], 0x1f ; C20A8D1F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900 buffer_load_format_xyzw v[13:16], v0, s[8:11], 0 idxen ; E00C2000 80020D00 s_buffer_load_dword s8, s[12:15], 0x20 ; C2040D20 s_buffer_load_dword s9, s[12:15], 0x21 ; C2048D21 s_buffer_load_dword s10, s[12:15], 0x18 ; C2050D18 s_buffer_load_dword s11, s[12:15], 0x19 ; C2058D19 s_buffer_load_dword s16, s[12:15], 0x1a ; C2080D1A s_buffer_load_dword s17, s[12:15], 0x1b ; C2088D1B s_buffer_load_dword s18, s[12:15], 0x1c ; C2090D1C s_buffer_load_dword s19, s[12:15], 0x22 ; C2098D22 s_buffer_load_dword s22, s[12:15], 0x23 ; C20B0D23 s_buffer_load_dword s23, s[12:15], 0x24 ; C20B8D24 s_buffer_load_dword s24, s[12:15], 0x25 ; C20C0D25 s_buffer_load_dword s25, s[12:15], 0x26 ; C20C8D26 s_buffer_load_dword s0, s[12:15], 0xf ; C2000D0F s_buffer_load_dword s26, s[12:15], 0x10 ; C20D0D10 s_buffer_load_dword s27, s[12:15], 0x11 ; C20D8D11 s_buffer_load_dword s6, s[12:15], 0x12 ; C2030D12 s_buffer_load_dword s5, s[12:15], 0x13 ; C2028D13 s_buffer_load_dword s28, s[12:15], 0x27 ; C20E0D27 s_buffer_load_dword s29, s[12:15], 0x28 ; C20E8D28 s_buffer_load_dword s30, s[12:15], 0x29 ; C20F0D29 s_buffer_load_dword s31, s[12:15], 0x2a ; C20F8D2A s_buffer_load_dword s32, s[12:15], 0x2c ; C2100D2C s_buffer_load_dword s33, s[12:15], 0x34 ; C2108D34 s_buffer_load_dword s34, s[12:15], 0x35 ; C2110D35 s_buffer_load_dword s35, s[12:15], 0x36 ; C2118D36 s_buffer_load_dword s36, s[12:15], 0x2d ; C2120D2D s_buffer_load_dword s37, s[12:15], 0x2e ; C2128D2E s_buffer_load_dword s38, s[12:15], 0x30 ; C2130D30 s_buffer_load_dword s39, s[12:15], 0x31 ; C2138D31 s_buffer_load_dword s40, s[12:15], 0x32 ; C2140D32 s_buffer_load_dword s4, s[12:15], 0x0 ; C2020D00 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s32 ; 7E000220 s_buffer_load_dword s3, s[12:15], 0x1 ; C2018D01 s_buffer_load_dword s2, s[12:15], 0x2 ; C2010D02 s_buffer_load_dword s1, s[12:15], 0x3 ; C2008D03 v_mul_f32_e32 v17, s18, v3 ; 10220612 v_mad_f32 v17, s10, v2, v17 ; D2820011 0446040A v_mul_f32_e32 v18, s20, v3 ; 10240614 v_mad_f32 v18, s11, v2, v18 ; D2820012 044A040B v_mul_f32_e32 v19, s7, v3 ; 10260607 v_mad_f32 v19, s16, v2, v19 ; D2820013 044E0410 v_mul_f32_e32 v3, s21, v3 ; 10060615 v_mad_f32 v2, s17, v2, v3 ; D2820002 040E0411 v_mad_f32 v3, s8, v4, v17 ; D2820003 04460808 v_mad_f32 v17, s9, v4, v18 ; D2820011 044A0809 v_mad_f32 v18, s19, v4, v19 ; D2820012 044E0813 v_mad_f32 v2, s22, v4, v2 ; D2820002 040A0816 v_mad_f32 v3, s23, v5, v3 ; D2820003 040E0A17 v_mad_f32 v4, s24, v5, v17 ; D2820004 04460A18 v_mad_f32 v17, s25, v5, v18 ; D2820011 044A0A19 v_mad_f32 v2, s28, v5, v2 ; D2820002 040A0A1C v_mad_f32 v5, v7, v1, -1.0 ; D2820005 03CE0307 v_mad_f32 v7, v8, v1, -1.0 ; D2820007 03CE0308 v_mad_f32 v6, v6, v1, -1.0 ; D2820006 03CE0306 v_mad_f32 v8, v9, v1, -1.0 ; D2820008 03CE0309 v_mad_f32 v9, v10, v1, -1.0 ; D2820009 03CE030A v_mad_f32 v10, v11, v1, -1.0 ; D282000A 03CE030B v_mad_f32 v1, v12, v1, -1.0 ; D2820001 03CE030C exp 15, 32, 0, 0, 0, v13, v14, v15, v16 ; F800020F 100F0E0D s_buffer_load_dword s7, s[12:15], 0x4 ; C2038D04 s_buffer_load_dword s8, s[12:15], 0x5 ; C2040D05 s_buffer_load_dword s9, s[12:15], 0x6 ; C2048D06 s_buffer_load_dword s10, s[12:15], 0x7 ; C2050D07 s_buffer_load_dword s11, s[12:15], 0x8 ; C2058D08 s_buffer_load_dword s16, s[12:15], 0x9 ; C2080D09 s_buffer_load_dword s17, s[12:15], 0xa ; C2088D0A s_buffer_load_dword s18, s[12:15], 0xb ; C2090D0B s_buffer_load_dword s19, s[12:15], 0xc ; C2098D0C s_buffer_load_dword s20, s[12:15], 0xd ; C20A0D0D s_buffer_load_dword s12, s[12:15], 0xe ; C2060D0E v_mul_f32_e32 v11, v10, v5 ; 10160B0A v_mad_f32 v11, v9, v7, -v11 ; D282000B 842E0F09 v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mad_f32 v7, v10, v6, -v7 ; D2820007 841E0D0A v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mad_f32 v5, v8, v5, -v6 ; D2820005 841A0B08 v_mul_f32_e32 v0, s34, v0 ; 10000022 v_mov_b32_e32 v6, s36 ; 7E0C0224 v_mul_f32_e32 v6, s34, v6 ; 100C0C22 v_mov_b32_e32 v12, s37 ; 7E180225 v_mul_f32_e32 v12, s34, v12 ; 10181822 v_mul_f32_e32 v11, v11, v1 ; 1016030B v_mul_f32_e32 v7, v7, v1 ; 100E0307 v_mul_f32_e32 v5, v5, v1 ; 100A0305 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v13, v5, v9 ; 101A1305 v_mad_f32 v13, v7, v10, -v13 ; D282000D 84361507 v_mul_f32_e32 v14, v11, v10 ; 101C150B v_mad_f32 v14, v5, v8, -v14 ; D282000E 843A1105 v_mul_f32_e32 v15, v7, v8 ; 101E1107 v_mad_f32 v15, v11, v9, -v15 ; D282000F 843E130B v_mul_f32_e32 v13, v13, v1 ; 101A030D v_mul_f32_e32 v14, v14, v1 ; 101C030E v_mul_f32_e32 v1, v15, v1 ; 1002030F v_mov_b32_e32 v15, s33 ; 7E1E0221 v_mad_f32 v0, v15, s29, v0 ; D2820000 04003B0F v_mov_b32_e32 v15, s33 ; 7E1E0221 v_mad_f32 v6, v15, s30, v6 ; D2820006 04183D0F v_mov_b32_e32 v15, s33 ; 7E1E0221 v_mad_f32 v12, v15, s31, v12 ; D282000C 04303F0F v_mov_b32_e32 v15, s35 ; 7E1E0223 v_mad_f32 v0, v15, s38, v0 ; D2820000 04004D0F v_mov_b32_e32 v15, s35 ; 7E1E0223 v_mad_f32 v6, v15, s39, v6 ; D2820006 04184F0F v_mov_b32_e32 v15, s35 ; 7E1E0223 v_mad_f32 v12, v15, s40, v12 ; D282000C 0430510F v_mul_f32_e32 v15, v0, v13 ; 101E1B00 v_mul_f32_e32 v16, v0, v11 ; 10201700 v_mul_f32_e32 v0, v0, v8 ; 10001100 v_mad_f32 v15, v14, v6, v15 ; D282000F 043E0D0E v_mad_f32 v16, v7, v6, v16 ; D2820010 04420D07 v_mad_f32 v0, v9, v6, v0 ; D2820000 04020D09 v_mad_f32 v6, v1, v12, v15 ; D2820006 043E1901 v_mad_f32 v15, v5, v12, v16 ; D282000F 04421905 v_mad_f32 v0, v10, v12, v0 ; D2820000 0402190A v_mov_b32_e32 v12, 0 ; 7E180280 exp 15, 33, 0, 0, 0, v12, v12, v12, v12 ; F800021F 0C0C0C0C exp 15, 34, 0, 0, 0, v6, v15, v0, v12 ; F800022F 0C000F06 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, s27 ; 7E00021B v_mad_f32 v0, -s5, v4, v0 ; D2820000 24020805 v_mul_f32_e32 v6, s32, v0 ; 100C0020 v_mul_f32_e32 v15, s36, v0 ; 101E0024 v_mul_f32_e32 v0, s37, v0 ; 10000025 v_mov_b32_e32 v16, s26 ; 7E20021A v_mad_f32 v16, -s5, v3, v16 ; D2820010 24420605 v_mad_f32 v6, s29, v16, v6 ; D2820006 041A201D v_mad_f32 v15, s30, v16, v15 ; D282000F 043E201E v_mad_f32 v0, s31, v16, v0 ; D2820000 0402201F v_mov_b32_e32 v16, s6 ; 7E200206 v_mad_f32 v16, -s5, v17, v16 ; D2820010 24422205 v_mad_f32 v6, s38, v16, v6 ; D2820006 041A2026 v_mad_f32 v15, s39, v16, v15 ; D282000F 043E2027 v_mad_f32 v0, s40, v16, v0 ; D2820000 04022028 v_mul_f32_e32 v13, v6, v13 ; 101A1B06 v_mul_f32_e32 v11, v6, v11 ; 10161706 v_mul_f32_e32 v6, v6, v8 ; 100C1106 v_mad_f32 v8, v14, v15, v13 ; D2820008 04361F0E v_mad_f32 v7, v7, v15, v11 ; D2820007 042E1F07 v_mad_f32 v6, v9, v15, v6 ; D2820006 041A1F09 v_mad_f32 v1, v1, v0, v8 ; D2820001 04220101 v_mad_f32 v5, v5, v0, v7 ; D2820005 041E0105 v_mad_f32 v0, v10, v0, v6 ; D2820000 041A010A v_mul_f32_e32 v6, s7, v4 ; 100C0807 v_mad_f32 v6, s4, v3, v6 ; D2820006 041A0604 v_mul_f32_e32 v7, s8, v4 ; 100E0808 v_mad_f32 v7, s3, v3, v7 ; D2820007 041E0603 v_mul_f32_e32 v8, s9, v4 ; 10100809 v_mad_f32 v8, s2, v3, v8 ; D2820008 04220602 v_mul_f32_e32 v9, s10, v4 ; 1012080A v_mad_f32 v9, s1, v3, v9 ; D2820009 04260601 v_mad_f32 v6, s11, v17, v6 ; D2820006 041A220B v_mad_f32 v7, s16, v17, v7 ; D2820007 041E2210 v_mad_f32 v8, s17, v17, v8 ; D2820008 04222211 exp 15, 35, 0, 0, 0, v12, v12, v12, v12 ; F800023F 0C0C0C0C v_mov_b32_e32 v10, 1.0 ; 7E1402F2 exp 15, 36, 0, 0, 0, v1, v5, v0, v10 ; F800024F 0A000501 exp 15, 37, 0, 0, 0, v3, v4, v17, v2 ; F800025F 02110403 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, s18, v17, v9 ; D2820000 04262212 v_mad_f32 v1, s19, v2, v6 ; D2820001 041A0413 v_mad_f32 v3, s20, v2, v7 ; D2820003 041E0414 v_mad_f32 v4, s12, v2, v8 ; D2820004 0422040C v_mad_f32 v0, s0, v2, v0 ; D2820000 04020400 exp 15, 12, 0, 1, 0, v1, v3, v4, v0 ; F80008CF 00040301 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 20 Code Size: 1036 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[4], PERSPECTIVE DCL IN[1], TEXCOORD[6], PERSPECTIVE DCL IN[2], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..5] IMM[0] FLT32 { 0.0000, 2.0000, -1.0000, 1.0000} IMM[1] FLT32 { -0.0000, 15.0000, 0.9151, 0.0000} IMM[2] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[1], IN[1] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[1], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[0], IN[0] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[0], TEMP[0].xxxx 8: MAD TEMP[1].xyz, TEMP[1].zzzz, IMM[0].xxyw, -TEMP[1] 9: UIF CONST[240].xxxx :0 10: MUL TEMP[3].xyz, CONST[7].xyww, IN[2].yyyy 11: MAD TEMP[3].xyz, CONST[6].xyww, IN[2].xxxx, TEMP[3] 12: MAD TEMP[3].xyz, CONST[8].xyww, IN[2].zzzz, TEMP[3] 13: MAD TEMP[3].xyz, CONST[9].xyww, IN[2].wwww, TEMP[3] 14: RCP TEMP[1].w, TEMP[3].zzzz 15: MUL TEMP[3].xy, TEMP[1].wwww, TEMP[3] 16: MAD TEMP[3].xy, TEMP[3], CONST[1], CONST[1].wzzw 17: TEX TEMP[3], TEMP[3], SAMP[0], 2D 18: UIF CONST[240].yyyy :0 19: DP3 TEMP[1].w, IN[2], IN[2] 20: RSQ TEMP[0], |TEMP[1].wwww| 21: MIN TEMP[1].w, IMM[2].xxxx, TEMP[0] 22: RCP TEMP[1].w, TEMP[1].wwww 23: ADD TEMP[1].w, -TEMP[1].wwww, CONST[0].xxxx 24: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[0].yyyy 25: MUL TEMP[1].w, TEMP[1].wwww, TEMP[1].wwww 26: ADD TEMP[2].w, TEMP[3].wwww, IMM[0].zzzz 27: MAD TEMP[4].xyz, TEMP[1].wwww, TEMP[2].wwww, IMM[0].wwww 28: ELSE :30 29: MOV TEMP[4].xyz, IMM[0].wwww 30: ENDIF 31: MUL TEMP[3].xyz, TEMP[3], TEMP[4] 32: ELSE :34 33: MOV TEMP[3].xyz, IMM[0].wwww 34: ENDIF 35: MOV TEMP[1].w, IMM[0].wwww 36: ADD TEMP[4].xyz, TEMP[1].wwww, -CONST[10] 37: MUL TEMP[5].xyz, CONST[11], CONST[11].wwww 38: MUL TEMP[4].xyz, TEMP[4], TEMP[5] 39: MAD TEMP[4].xyz, TEMP[4], CONST[4].wwww, CONST[4] 40: MOV_SAT TEMP[1].w, TEMP[2].zzzz 41: ADD TEMP[2].w, TEMP[1].wwww, IMM[1].xxxx 42: DP3_SAT TEMP[1].x, TEMP[1], TEMP[2] 43: ADD TEMP[1].y, TEMP[1].xxxx, IMM[1].xxxx 44: POW TEMP[2].x, |TEMP[1].xxxx|, IMM[1].yyyy 45: MUL TEMP[1].x, TEMP[2].xxxx, IMM[1].zzzz 46: MUL TEMP[2].xyz, TEMP[1].wwww, TEMP[4] 47: CMP TEMP[2].xyz, TEMP[2].wwww, IMM[0].xxxx, TEMP[2] 48: MUL TEMP[1].xzw, TEMP[1].xxxx, CONST[5].xyyz 49: CMP TEMP[1].xyz, TEMP[1].yyyy, IMM[0].xxxx, TEMP[1].xzww 50: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 51: MUL TEMP[1].xyz, TEMP[3], TEMP[1] 52: MUL OUT[0].xyz, TEMP[1], CONST[12] 53: MOV OUT[0].w, IMM[0].xxxx 54: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %44 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %54 = fmul float %47, %47 %55 = fmul float %48, %48 %56 = fadd float %55, %54 %57 = fmul float %49, %49 %58 = fadd float %56, %57 %59 = call float @llvm.AMDGPU.rsq.clamped.f32(float %58) %60 = call float @llvm.minnum.f32(float %59, float 0x47EFFFFFE0000000) %61 = fmul float %47, %60 %62 = fmul float %48, %60 %63 = fmul float %49, %60 %64 = fmul float %44, %44 %65 = fmul float %45, %45 %66 = fadd float %65, %64 %67 = fmul float %46, %46 %68 = fadd float %66, %67 %69 = call float @llvm.AMDGPU.rsq.clamped.f32(float %68) %70 = call float @llvm.minnum.f32(float %69, float 0x47EFFFFFE0000000) %71 = fmul float %44, %70 %72 = fmul float %45, %70 %73 = fmul float %46, %70 %74 = fmul float %63, 0.000000e+00 %75 = fsub float %74, %61 %76 = fmul float %63, 0.000000e+00 %77 = fsub float %76, %62 %78 = fmul float %63, 2.000000e+00 %79 = fsub float %78, %63 %80 = bitcast float %43 to i32 %81 = icmp eq i32 %80, 0 br i1 %81, label %ENDIF, label %IF IF: ; preds = %main_body %82 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 %84 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %85 = load <32 x i8>, <32 x i8> addrspace(2)* %84, align 32, !tbaa !0 %86 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3844) %87 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %88 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %89 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %90 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %91 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %92 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %93 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %94 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %95 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %96 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %97 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %98 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %99 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %100 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %101 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %102 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %103 = fmul float %95, %51 %104 = fmul float %94, %51 %105 = fmul float %93, %51 %106 = fmul float %98, %50 %107 = fadd float %106, %103 %108 = fmul float %97, %50 %109 = fadd float %108, %104 %110 = fmul float %96, %50 %111 = fadd float %110, %105 %112 = fmul float %92, %52 %113 = fadd float %112, %107 %114 = fmul float %91, %52 %115 = fadd float %114, %109 %116 = fmul float %90, %52 %117 = fadd float %116, %111 %118 = fmul float %89, %53 %119 = fadd float %118, %113 %120 = fmul float %88, %53 %121 = fadd float %120, %115 %122 = fmul float %87, %53 %123 = fadd float %122, %117 %124 = fdiv float 1.000000e+00, %123 %125 = fmul float %124, %119 %126 = fmul float %124, %121 %127 = fmul float %125, %102 %128 = fadd float %127, %99 %129 = fmul float %126, %101 %130 = fadd float %129, %100 %131 = bitcast float %128 to i32 %132 = bitcast float %130 to i32 %133 = insertelement <2 x i32> undef, i32 %131, i32 0 %134 = insertelement <2 x i32> %133, i32 %132, i32 1 %135 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %134, <32 x i8> %85, <16 x i8> %83, i32 2) %136 = extractelement <4 x float> %135, i32 0 %137 = extractelement <4 x float> %135, i32 1 %138 = extractelement <4 x float> %135, i32 2 %139 = bitcast float %86 to i32 %140 = icmp eq i32 %139, 0 br i1 %140, label %ENDIF24, label %IF25 ENDIF: ; preds = %main_body, %ENDIF24 %temp12.0 = phi float [ %214, %ENDIF24 ], [ 1.000000e+00, %main_body ] %temp13.0 = phi float [ %215, %ENDIF24 ], [ 1.000000e+00, %main_body ] %temp14.0 = phi float [ %216, %ENDIF24 ], [ 1.000000e+00, %main_body ] %141 = fsub float 1.000000e+00, %33 %142 = fsub float 1.000000e+00, %34 %143 = fsub float 1.000000e+00, %35 %144 = fmul float %36, %39 %145 = fmul float %37, %39 %146 = fmul float %38, %39 %147 = fmul float %141, %144 %148 = fmul float %142, %145 %149 = fmul float %143, %146 %150 = fmul float %147, %29 %151 = fadd float %150, %26 %152 = fmul float %148, %29 %153 = fadd float %152, %27 %154 = fmul float %149, %29 %155 = fadd float %154, %28 %156 = call float @llvm.AMDIL.clamp.(float %73, float 0.000000e+00, float 1.000000e+00) %157 = fadd float %156, 0xBEB0C6F7A0000000 %158 = fmul float %75, %71 %159 = fmul float %77, %72 %160 = fadd float %159, %158 %161 = fmul float %79, %73 %162 = fadd float %160, %161 %163 = call float @llvm.AMDIL.clamp.(float %162, float 0.000000e+00, float 1.000000e+00) %164 = fadd float %163, 0xBEB0C6F7A0000000 %165 = call float @fabs(float %163) %166 = call float @llvm.pow.f32(float %165, float 1.500000e+01) %167 = fmul float %166, 0x3FED48D5A0000000 %168 = fmul float %156, %151 %169 = fmul float %156, %153 %170 = fmul float %156, %155 %171 = call float @llvm.AMDGPU.cndlt(float %157, float 0.000000e+00, float %168) %172 = call float @llvm.AMDGPU.cndlt(float %157, float 0.000000e+00, float %169) %173 = call float @llvm.AMDGPU.cndlt(float %157, float 0.000000e+00, float %170) %174 = fmul float %167, %30 %175 = fmul float %167, %31 %176 = fmul float %167, %32 %177 = call float @llvm.AMDGPU.cndlt(float %164, float 0.000000e+00, float %174) %178 = call float @llvm.AMDGPU.cndlt(float %164, float 0.000000e+00, float %175) %179 = call float @llvm.AMDGPU.cndlt(float %164, float 0.000000e+00, float %176) %180 = fadd float %177, %171 %181 = fadd float %178, %172 %182 = fadd float %179, %173 %183 = fmul float %temp12.0, %180 %184 = fmul float %temp13.0, %181 %185 = fmul float %temp14.0, %182 %186 = fmul float %183, %40 %187 = fmul float %184, %41 %188 = fmul float %185, %42 %189 = call i32 @llvm.SI.packf16(float %186, float %187) %190 = bitcast i32 %189 to float %191 = call i32 @llvm.SI.packf16(float %188, float 0.000000e+00) %192 = bitcast i32 %191 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %190, float %192, float %190, float %192) ret void IF25: ; preds = %IF %193 = extractelement <4 x float> %135, i32 3 %194 = fmul float %50, %50 %195 = fmul float %51, %51 %196 = fadd float %195, %194 %197 = fmul float %52, %52 %198 = fadd float %196, %197 %199 = call float @fabs(float %198) %200 = call float @llvm.AMDGPU.rsq.clamped.f32(float %199) %201 = call float @llvm.minnum.f32(float %200, float 0x47EFFFFFE0000000) %202 = fdiv float 1.000000e+00, %201 %203 = fsub float %24, %202 %204 = fmul float %203, %25 %205 = call float @llvm.AMDIL.clamp.(float %204, float 0.000000e+00, float 1.000000e+00) %206 = fmul float %205, %205 %207 = fadd float %193, -1.000000e+00 %208 = fmul float %206, %207 %209 = fadd float %208, 1.000000e+00 %210 = fmul float %206, %207 %211 = fadd float %210, 1.000000e+00 %212 = fmul float %206, %207 %213 = fadd float %212, 1.000000e+00 br label %ENDIF24 ENDIF24: ; preds = %IF, %IF25 %temp16.0 = phi float [ %209, %IF25 ], [ 1.000000e+00, %IF ] %temp17.0 = phi float [ %211, %IF25 ], [ 1.000000e+00, %IF ] %temp18.0 = phi float [ %213, %IF25 ], [ 1.000000e+00, %IF ] %214 = fmul float %136, %temp16.0 %215 = fmul float %137, %temp17.0 %216 = fmul float %138, %temp18.0 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v8, v0, 0, 1, [m0] ; C8200400 v_interp_p2_f32 v8, [v8], v1, 0, 1, [m0] ; C8210401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 s_movk_i32 s8, 0xf00 ; B0080F00 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s10, s[0:3], s8 ; C2050008 v_mul_f32_e32 v7, v8, v8 ; 100E1108 v_mad_f32 v7, v5, v5, v7 ; D2820007 041E0B05 v_mad_f32 v7, v6, v6, v7 ; D2820007 041E0D06 s_buffer_load_dword s9, s[0:3], 0x13 ; C2048113 s_buffer_load_dword s8, s[0:3], 0x2f ; C204012F v_rsq_clamp_f32_e32 v9, v7 ; 7E125907 v_mul_f32_e32 v7, v2, v2 ; 100E0502 v_mad_f32 v7, v3, v3, v7 ; D2820007 041E0703 v_mad_f32 v7, v4, v4, v7 ; D2820007 041E0904 v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ne_i32_e64 s[10:11], 0, s10 ; D10A000A 00001480 v_mov_b32_e32 v10, 1.0 ; 7E1402F2 v_mov_b32_e32 v11, 1.0 ; 7E1602F2 v_mov_b32_e32 v12, 1.0 ; 7E1802F2 s_and_saveexec_b64 s[10:11], s[10:11] ; BE8A240A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E s_cbranch_execz BB0_4 ; BF880000 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 v_interp_p1_f32 v12, v0, 2, 2, [m0] ; C8300A00 v_interp_p2_f32 v12, [v12], v1, 2, 2, [m0] ; C8310A01 s_buffer_load_dword s12, s[0:3], 0x19 ; C2060119 s_buffer_load_dword s13, s[0:3], 0x1b ; C206811B s_buffer_load_dword s14, s[0:3], 0x1c ; C207011C s_buffer_load_dword s15, s[0:3], 0x1d ; C207811D s_buffer_load_dword s16, s[0:3], 0x4 ; C2080104 s_buffer_load_dword s17, s[0:3], 0x5 ; C2088105 s_buffer_load_dword s18, s[0:3], 0x6 ; C2090106 s_buffer_load_dword s19, s[0:3], 0x7 ; C2098107 s_buffer_load_dword s20, s[0:3], 0x18 ; C20A0118 v_interp_p1_f32 v0, v0, 3, 2, [m0] ; C8000B00 v_interp_p2_f32 v0, [v0], v1, 3, 2, [m0] ; C8010B01 s_buffer_load_dword s21, s[0:3], 0x27 ; C20A8127 s_buffer_load_dword s22, s[0:3], 0x1f ; C20B011F s_buffer_load_dword s23, s[0:3], 0x20 ; C20B8120 s_buffer_load_dword s24, s[0:3], 0x21 ; C20C0121 s_buffer_load_dword s25, s[0:3], 0x23 ; C20C8123 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s14, v11 ; 1002160E v_mul_f32_e32 v13, s15, v11 ; 101A160F v_mad_f32 v1, s20, v10, v1 ; D2820001 04061414 v_mad_f32 v13, s12, v10, v13 ; D282000D 0436140C s_buffer_load_dword s12, s[0:3], 0x24 ; C2060124 s_buffer_load_dword s14, s[0:3], 0x25 ; C2070125 v_mul_f32_e32 v14, s22, v11 ; 101C1616 v_mad_f32 v14, s13, v10, v14 ; D282000E 043A140D v_mad_f32 v1, s23, v12, v1 ; D2820001 04061817 v_mad_f32 v14, s25, v12, v14 ; D282000E 043A1819 v_mad_f32 v14, s21, v0, v14 ; D282000E 043A0015 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_movk_i32 s13, 0xf04 ; B00D0F04 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_buffer_load_dword s13, s[0:3], s13 ; C206800D v_rcp_f32_e32 v14, v14 ; 7E1C550E v_mad_f32 v13, s24, v12, v13 ; D282000D 04361818 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s12, v0, v1 ; D2820001 0406000C v_mad_f32 v0, s14, v0, v13 ; D2820000 0436000E v_mul_f32_e32 v1, v1, v14 ; 10021D01 v_mul_f32_e32 v0, v0, v14 ; 10001D00 v_mov_b32_e32 v13, s19 ; 7E1A0213 v_mad_f32 v13, s16, v1, v13 ; D282000D 04360210 v_mov_b32_e32 v1, s18 ; 7E020212 v_mad_f32 v14, s17, v0, v1 ; D282000E 04060011 image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[28:35], s[20:23] ; F0800F00 00A70D0D v_cmp_ne_i32_e64 s[12:13], 0, s13 ; D10A000C 00001A80 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[12:13], s[12:13] ; BE8C240C s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_cbranch_execz BB0_5 ; BF880000 s_buffer_load_dword s14, s[0:3], 0x0 ; C2070100 s_buffer_load_dword s15, s[0:3], 0x1 ; C2078101 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v10, v10 ; 1000150A v_mad_f32 v0, v11, v11, v0 ; D2820000 0402170B v_mad_f32 v0, v12, v12, v0 ; D2820000 0402190C v_rsq_clamp_f32_e64 v0, |v0| ; D3580100 00000100 v_add_f32_e32 v1, -1.0, v16 ; 060220F3 v_min_f32_e32 v0, 0x7f7fffff, v0 ; 1E0000FF 7F7FFFFF v_rcp_f32_e32 v0, v0 ; 7E005500 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v0, s14, v0 ; 0800000E v_mul_f32_e32 v0, s15, v0 ; 1000000F v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mad_f32 v0, v0, v1, 1.0 ; D2820000 03CA0300 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E v_mul_f32_e32 v12, v0, v13 ; 10181B00 v_mul_f32_e32 v11, v0, v14 ; 10161D00 v_mul_f32_e32 v10, v0, v15 ; 10141F00 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110 s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111 s_buffer_load_dword s11, s[0:3], 0x12 ; C2058112 s_buffer_load_dword s6, s[0:3], 0x14 ; C2030114 v_min_f32_e32 v0, 0x7f7fffff, v9 ; 1E0012FF 7F7FFFFF v_mul_f32_e32 v1, v0, v8 ; 10021100 s_buffer_load_dword s7, s[0:3], 0x15 ; C2038115 s_buffer_load_dword s10, s[0:3], 0x16 ; C2050116 s_buffer_load_dword s15, s[0:3], 0x28 ; C2078128 s_buffer_load_dword s16, s[0:3], 0x29 ; C2080129 s_buffer_load_dword s14, s[0:3], 0x2a ; C207012A s_buffer_load_dword s17, s[0:3], 0x2c ; C208812C s_buffer_load_dword s18, s[0:3], 0x2d ; C209012D s_buffer_load_dword s19, s[0:3], 0x2e ; C209812E s_buffer_load_dword s4, s[0:3], 0x30 ; C2020130 s_buffer_load_dword s5, s[0:3], 0x31 ; C2028131 s_buffer_load_dword s0, s[0:3], 0x32 ; C2000132 v_mul_f32_e32 v8, v0, v6 ; 10100D00 v_mad_f32 v9, v6, v0, v8 ; D2820009 04220106 v_mad_f32 v6, -v6, v0, v9 ; D2820006 24260106 v_mul_f32_e32 v9, v0, v5 ; 10120B00 v_min_f32_e32 v0, 0x7f7fffff, v7 ; 1E000EFF 7F7FFFFF v_mul_f32_e32 v5, v0, v2 ; 100A0500 v_mul_f32_e32 v3, v0, v3 ; 10060700 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_mad_f32 v4, 0, v8, -v1 ; D2820004 84061080 v_mad_f32 v7, 0, v8, -v9 ; D2820007 84261080 v_mov_b32_e32 v1, s9 ; 7E020209 v_mov_b32_e32 v2, s8 ; 7E040208 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, v5, v4 ; 10080905 v_mad_f32 v3, v7, v3, v4 ; D2820003 04120707 v_mad_f32 v3, v6, v0, v3 ; D2820003 040E0106 v_sub_f32_e64 v4, 1.0, s15 ; D2080004 00001EF2 v_sub_f32_e64 v5, 1.0, s16 ; D2080005 000020F2 v_mul_f32_e32 v6, s17, v2 ; 100C0411 v_mul_f32_e32 v7, s18, v2 ; 100E0412 v_mul_f32_e32 v2, s19, v2 ; 10040413 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v5, v7, v5 ; 100A0B07 v_sub_f32_e64 v6, 1.0, s14 ; D2080006 00001CF2 v_mul_f32_e32 v2, v2, v6 ; 10040D02 v_mad_f32 v4, v1, v4, s12 ; D2820004 00320901 v_mad_f32 v5, v5, v1, s13 ; D2820005 00360305 v_mad_f32 v1, v2, v1, s11 ; D2820001 002E0302 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_add_f32_e64 v2, 0, v3 clamp ; D2060802 00020680 v_and_b32_e32 v3, 0x7fffffff, v2 ; 360604FF 7FFFFFFF v_log_f32_e32 v3, v3 ; 7E064F03 v_mov_b32_e32 v6, 0xb58637bd ; 7E0C02FF B58637BD v_add_f32_e32 v7, v6, v0 ; 060E0106 v_add_f32_e32 v2, v6, v2 ; 06040506 v_mul_legacy_f32_e32 v3, 0x41700000, v3 ; 0E0606FF 41700000 v_exp_f32_e32 v3, v3 ; 7E064B03 v_mul_f32_e32 v3, 0x3f6a46ad, v3 ; 100606FF 3F6A46AD v_mul_f32_e32 v4, v4, v0 ; 10080104 v_mul_f32_e32 v5, v5, v0 ; 100A0105 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_cmp_gt_f32_e32 vcc, 0, v7 ; 7C080E80 v_cndmask_b32_e64 v1, v4, 0, vcc ; D2000001 01A90104 v_cndmask_b32_e64 v4, v5, 0, vcc ; D2000004 01A90105 v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 v_mul_f32_e32 v5, s6, v3 ; 100A0606 v_mul_f32_e32 v6, s7, v3 ; 100C0607 v_mul_f32_e32 v3, s10, v3 ; 1006060A v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v2, v5, 0, vcc ; D2000002 01A90105 v_cndmask_b32_e64 v5, v6, 0, vcc ; D2000005 01A90106 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_add_f32_e32 v1, v1, v2 ; 06020501 v_add_f32_e32 v2, v4, v5 ; 06040B04 v_add_f32_e32 v0, v0, v3 ; 06000700 v_mul_f32_e32 v1, v1, v12 ; 10021901 v_mul_f32_e32 v2, v2, v11 ; 10041702 v_mul_f32_e32 v0, v0, v10 ; 10001500 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mul_f32_e32 v0, s0, v0 ; 10000000 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 20 Code Size: 988 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[4], PERSPECTIVE DCL IN[2], TEXCOORD[6], PERSPECTIVE DCL IN[3], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..5] IMM[0] FLT32 { 0.0000, 2.0000, -1.0000, 1.0000} IMM[1] FLT32 { -0.0000, 8.0000, 0.0398, 0.0000} IMM[2] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[2], IN[2] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[2], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[1], IN[1] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[1], TEMP[0].xxxx 8: MAD TEMP[1].xyz, TEMP[1].zzzz, IMM[0].xxyw, -TEMP[1] 9: UIF CONST[240].xxxx :0 10: MUL TEMP[3].xyz, CONST[7].xyww, IN[3].yyyy 11: MAD TEMP[3].xyz, CONST[6].xyww, IN[3].xxxx, TEMP[3] 12: MAD TEMP[3].xyz, CONST[8].xyww, IN[3].zzzz, TEMP[3] 13: MAD TEMP[3].xyz, CONST[9].xyww, IN[3].wwww, TEMP[3] 14: RCP TEMP[1].w, TEMP[3].zzzz 15: MUL TEMP[3].xy, TEMP[1].wwww, TEMP[3] 16: MAD TEMP[3].xy, TEMP[3], CONST[1], CONST[1].wzzw 17: TEX TEMP[3], TEMP[3], SAMP[0], 2D 18: UIF CONST[240].yyyy :0 19: DP3 TEMP[1].w, IN[3], IN[3] 20: RSQ TEMP[0], |TEMP[1].wwww| 21: MIN TEMP[1].w, IMM[2].xxxx, TEMP[0] 22: RCP TEMP[1].w, TEMP[1].wwww 23: ADD TEMP[1].w, -TEMP[1].wwww, CONST[0].xxxx 24: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[0].yyyy 25: MUL TEMP[1].w, TEMP[1].wwww, TEMP[1].wwww 26: ADD TEMP[2].w, TEMP[3].wwww, IMM[0].zzzz 27: MAD TEMP[4].xyz, TEMP[1].wwww, TEMP[2].wwww, IMM[0].wwww 28: ELSE :30 29: MOV TEMP[4].xyz, IMM[0].wwww 30: ENDIF 31: MUL TEMP[3].xyz, TEMP[3], TEMP[4] 32: ELSE :34 33: MOV TEMP[3].xyz, IMM[0].wwww 34: ENDIF 35: MOV TEMP[1].w, IMM[0].wwww 36: ADD TEMP[4].xyz, TEMP[1].wwww, -CONST[10] 37: TEX TEMP[5], IN[0], SAMP[1], 2D 38: MUL TEMP[4].xyz, TEMP[4], TEMP[5] 39: MAD TEMP[4].xyz, TEMP[4], CONST[4].wwww, CONST[4] 40: TEX TEMP[5], IN[0], SAMP[2], 2D 41: MAD TEMP[5].xyz, TEMP[5], CONST[5].wwww, CONST[5] 42: MOV_SAT TEMP[1].w, TEMP[2].zzzz 43: ADD TEMP[2].w, TEMP[1].wwww, IMM[1].xxxx 44: DP3_SAT TEMP[1].x, TEMP[1], TEMP[2] 45: ADD TEMP[1].y, TEMP[1].xxxx, IMM[1].xxxx 46: POW TEMP[2].x, |TEMP[1].xxxx|, CONST[11].xxxx 47: MOV TEMP[2].y, IMM[1].yyyy 48: ADD TEMP[1].x, TEMP[2].yyyy, CONST[11].xxxx 49: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 50: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz 51: MUL TEMP[2].xyz, TEMP[1].wwww, TEMP[4] 52: CMP TEMP[2].xyz, TEMP[2].wwww, IMM[0].xxxx, TEMP[2] 53: MUL TEMP[1].xzw, TEMP[5].xyyz, TEMP[1].xxxx 54: CMP TEMP[1].xyz, TEMP[1].yyyy, IMM[0].xxxx, TEMP[1].xzww 55: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 56: MUL TEMP[1].xyz, TEMP[3], TEMP[1] 57: MUL OUT[0].xyz, TEMP[1], CONST[12] 58: MOV OUT[0].w, IMM[0].xxxx 59: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %42 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %43 = bitcast <8 x i32> addrspace(2)* %42 to <32 x i8> addrspace(2)* %44 = load <32 x i8>, <32 x i8> addrspace(2)* %43, align 32, !tbaa !0 %45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %46 = bitcast <4 x i32> addrspace(2)* %45 to <16 x i8> addrspace(2)* %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %49 = bitcast <8 x i32> addrspace(2)* %48 to <32 x i8> addrspace(2)* %50 = load <32 x i8>, <32 x i8> addrspace(2)* %49, align 32, !tbaa !0 %51 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %52 = bitcast <4 x i32> addrspace(2)* %51 to <16 x i8> addrspace(2)* %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %58 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %59 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %60 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %61 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %62 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %63 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %66 = fmul float %59, %59 %67 = fmul float %60, %60 %68 = fadd float %67, %66 %69 = fmul float %61, %61 %70 = fadd float %68, %69 %71 = call float @llvm.AMDGPU.rsq.clamped.f32(float %70) %72 = call float @llvm.minnum.f32(float %71, float 0x47EFFFFFE0000000) %73 = fmul float %59, %72 %74 = fmul float %60, %72 %75 = fmul float %61, %72 %76 = fmul float %56, %56 %77 = fmul float %57, %57 %78 = fadd float %77, %76 %79 = fmul float %58, %58 %80 = fadd float %78, %79 %81 = call float @llvm.AMDGPU.rsq.clamped.f32(float %80) %82 = call float @llvm.minnum.f32(float %81, float 0x47EFFFFFE0000000) %83 = fmul float %56, %82 %84 = fmul float %57, %82 %85 = fmul float %58, %82 %86 = fmul float %75, 0.000000e+00 %87 = fsub float %86, %73 %88 = fmul float %75, 0.000000e+00 %89 = fsub float %88, %74 %90 = fmul float %75, 2.000000e+00 %91 = fsub float %90, %75 %92 = bitcast float %41 to i32 %93 = icmp eq i32 %92, 0 br i1 %93, label %ENDIF, label %IF IF: ; preds = %main_body %94 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %95 = load <16 x i8>, <16 x i8> addrspace(2)* %94, align 16, !tbaa !0 %96 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %97 = load <32 x i8>, <32 x i8> addrspace(2)* %96, align 32, !tbaa !0 %98 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3844) %99 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %100 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %101 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %102 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %103 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %104 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %105 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %106 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %107 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %108 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %109 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %110 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %111 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %112 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %113 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %114 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %115 = fmul float %107, %63 %116 = fmul float %106, %63 %117 = fmul float %105, %63 %118 = fmul float %110, %62 %119 = fadd float %118, %115 %120 = fmul float %109, %62 %121 = fadd float %120, %116 %122 = fmul float %108, %62 %123 = fadd float %122, %117 %124 = fmul float %104, %64 %125 = fadd float %124, %119 %126 = fmul float %103, %64 %127 = fadd float %126, %121 %128 = fmul float %102, %64 %129 = fadd float %128, %123 %130 = fmul float %101, %65 %131 = fadd float %130, %125 %132 = fmul float %100, %65 %133 = fadd float %132, %127 %134 = fmul float %99, %65 %135 = fadd float %134, %129 %136 = fdiv float 1.000000e+00, %135 %137 = fmul float %136, %131 %138 = fmul float %136, %133 %139 = fmul float %137, %114 %140 = fadd float %139, %111 %141 = fmul float %138, %113 %142 = fadd float %141, %112 %143 = bitcast float %140 to i32 %144 = bitcast float %142 to i32 %145 = insertelement <2 x i32> undef, i32 %143, i32 0 %146 = insertelement <2 x i32> %145, i32 %144, i32 1 %147 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %146, <32 x i8> %97, <16 x i8> %95, i32 2) %148 = extractelement <4 x float> %147, i32 0 %149 = extractelement <4 x float> %147, i32 1 %150 = extractelement <4 x float> %147, i32 2 %151 = bitcast float %98 to i32 %152 = icmp eq i32 %151, 0 br i1 %152, label %ENDIF24, label %IF25 ENDIF: ; preds = %main_body, %ENDIF24 %temp12.0 = phi float [ %247, %ENDIF24 ], [ 1.000000e+00, %main_body ] %temp13.0 = phi float [ %248, %ENDIF24 ], [ 1.000000e+00, %main_body ] %temp14.0 = phi float [ %249, %ENDIF24 ], [ 1.000000e+00, %main_body ] %153 = fsub float 1.000000e+00, %34 %154 = fsub float 1.000000e+00, %35 %155 = fsub float 1.000000e+00, %36 %156 = bitcast float %54 to i32 %157 = bitcast float %55 to i32 %158 = insertelement <2 x i32> undef, i32 %156, i32 0 %159 = insertelement <2 x i32> %158, i32 %157, i32 1 %160 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %159, <32 x i8> %44, <16 x i8> %47, i32 2) %161 = extractelement <4 x float> %160, i32 0 %162 = extractelement <4 x float> %160, i32 1 %163 = extractelement <4 x float> %160, i32 2 %164 = fmul float %153, %161 %165 = fmul float %154, %162 %166 = fmul float %155, %163 %167 = fmul float %164, %29 %168 = fadd float %167, %26 %169 = fmul float %165, %29 %170 = fadd float %169, %27 %171 = fmul float %166, %29 %172 = fadd float %171, %28 %173 = bitcast float %54 to i32 %174 = bitcast float %55 to i32 %175 = insertelement <2 x i32> undef, i32 %173, i32 0 %176 = insertelement <2 x i32> %175, i32 %174, i32 1 %177 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %176, <32 x i8> %50, <16 x i8> %53, i32 2) %178 = extractelement <4 x float> %177, i32 0 %179 = extractelement <4 x float> %177, i32 1 %180 = extractelement <4 x float> %177, i32 2 %181 = fmul float %178, %33 %182 = fadd float %181, %30 %183 = fmul float %179, %33 %184 = fadd float %183, %31 %185 = fmul float %180, %33 %186 = fadd float %185, %32 %187 = call float @llvm.AMDIL.clamp.(float %85, float 0.000000e+00, float 1.000000e+00) %188 = fadd float %187, 0xBEB0C6F7A0000000 %189 = fmul float %87, %83 %190 = fmul float %89, %84 %191 = fadd float %190, %189 %192 = fmul float %91, %85 %193 = fadd float %191, %192 %194 = call float @llvm.AMDIL.clamp.(float %193, float 0.000000e+00, float 1.000000e+00) %195 = fadd float %194, 0xBEB0C6F7A0000000 %196 = call float @fabs(float %194) %197 = call float @llvm.pow.f32(float %196, float %37) %198 = fadd float %37, 8.000000e+00 %199 = fmul float %198, %197 %200 = fmul float %199, 0x3FA45F3060000000 %201 = fmul float %187, %168 %202 = fmul float %187, %170 %203 = fmul float %187, %172 %204 = call float @llvm.AMDGPU.cndlt(float %188, float 0.000000e+00, float %201) %205 = call float @llvm.AMDGPU.cndlt(float %188, float 0.000000e+00, float %202) %206 = call float @llvm.AMDGPU.cndlt(float %188, float 0.000000e+00, float %203) %207 = fmul float %182, %200 %208 = fmul float %184, %200 %209 = fmul float %186, %200 %210 = call float @llvm.AMDGPU.cndlt(float %195, float 0.000000e+00, float %207) %211 = call float @llvm.AMDGPU.cndlt(float %195, float 0.000000e+00, float %208) %212 = call float @llvm.AMDGPU.cndlt(float %195, float 0.000000e+00, float %209) %213 = fadd float %210, %204 %214 = fadd float %211, %205 %215 = fadd float %212, %206 %216 = fmul float %temp12.0, %213 %217 = fmul float %temp13.0, %214 %218 = fmul float %temp14.0, %215 %219 = fmul float %216, %38 %220 = fmul float %217, %39 %221 = fmul float %218, %40 %222 = call i32 @llvm.SI.packf16(float %219, float %220) %223 = bitcast i32 %222 to float %224 = call i32 @llvm.SI.packf16(float %221, float 0.000000e+00) %225 = bitcast i32 %224 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %223, float %225, float %223, float %225) ret void IF25: ; preds = %IF %226 = extractelement <4 x float> %147, i32 3 %227 = fmul float %62, %62 %228 = fmul float %63, %63 %229 = fadd float %228, %227 %230 = fmul float %64, %64 %231 = fadd float %229, %230 %232 = call float @fabs(float %231) %233 = call float @llvm.AMDGPU.rsq.clamped.f32(float %232) %234 = call float @llvm.minnum.f32(float %233, float 0x47EFFFFFE0000000) %235 = fdiv float 1.000000e+00, %234 %236 = fsub float %24, %235 %237 = fmul float %236, %25 %238 = call float @llvm.AMDIL.clamp.(float %237, float 0.000000e+00, float 1.000000e+00) %239 = fmul float %238, %238 %240 = fadd float %226, -1.000000e+00 %241 = fmul float %239, %240 %242 = fadd float %241, 1.000000e+00 %243 = fmul float %239, %240 %244 = fadd float %243, 1.000000e+00 %245 = fmul float %239, %240 %246 = fadd float %245, 1.000000e+00 br label %ENDIF24 ENDIF24: ; preds = %IF, %IF25 %temp16.0 = phi float [ %242, %IF25 ], [ 1.000000e+00, %IF ] %temp17.0 = phi float [ %244, %IF25 ], [ 1.000000e+00, %IF ] %temp18.0 = phi float [ %246, %IF25 ], [ 1.000000e+00, %IF ] %247 = fmul float %148, %temp16.0 %248 = fmul float %149, %temp17.0 %249 = fmul float %150, %temp18.0 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 s_movk_i32 s8, 0xf00 ; B0080F00 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s10, s[0:3], s8 ; C2050008 v_mul_f32_e32 v10, v7, v7 ; 10140F07 v_mad_f32 v10, v8, v8, v10 ; D282000A 042A1108 v_mad_f32 v10, v9, v9, v10 ; D282000A 042A1309 s_buffer_load_dword s9, s[0:3], 0x13 ; C2048113 s_buffer_load_dword s8, s[0:3], 0x17 ; C2040117 v_rsq_clamp_f32_e32 v11, v10 ; 7E16590A v_mul_f32_e32 v10, v4, v4 ; 10140904 v_mad_f32 v10, v5, v5, v10 ; D282000A 042A0B05 v_mad_f32 v10, v6, v6, v10 ; D282000A 042A0D06 v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ne_i32_e64 s[10:11], 0, s10 ; D10A000A 00001480 v_mov_b32_e32 v12, 1.0 ; 7E1802F2 v_mov_b32_e32 v13, 1.0 ; 7E1A02F2 v_mov_b32_e32 v14, 1.0 ; 7E1C02F2 s_and_saveexec_b64 s[10:11], s[10:11] ; BE8A240A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E s_cbranch_execz BB0_4 ; BF880000 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v13, v0, 1, 3, [m0] ; C8340D00 v_interp_p2_f32 v13, [v13], v1, 1, 3, [m0] ; C8350D01 v_interp_p1_f32 v14, v0, 2, 3, [m0] ; C8380E00 v_interp_p2_f32 v14, [v14], v1, 2, 3, [m0] ; C8390E01 s_buffer_load_dword s12, s[0:3], 0x19 ; C2060119 s_buffer_load_dword s13, s[0:3], 0x1b ; C206811B s_buffer_load_dword s14, s[0:3], 0x1c ; C207011C s_buffer_load_dword s15, s[0:3], 0x1d ; C207811D s_buffer_load_dword s16, s[0:3], 0x4 ; C2080104 s_buffer_load_dword s17, s[0:3], 0x5 ; C2088105 s_buffer_load_dword s18, s[0:3], 0x6 ; C2090106 s_buffer_load_dword s19, s[0:3], 0x7 ; C2098107 s_buffer_load_dword s20, s[0:3], 0x18 ; C20A0118 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 s_buffer_load_dword s21, s[0:3], 0x27 ; C20A8127 s_buffer_load_dword s22, s[0:3], 0x1f ; C20B011F s_buffer_load_dword s23, s[0:3], 0x20 ; C20B8120 s_buffer_load_dword s24, s[0:3], 0x21 ; C20C0121 s_buffer_load_dword s25, s[0:3], 0x23 ; C20C8123 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s14, v13 ; 10021A0E v_mul_f32_e32 v15, s15, v13 ; 101E1A0F v_mad_f32 v1, s20, v12, v1 ; D2820001 04061814 v_mad_f32 v15, s12, v12, v15 ; D282000F 043E180C s_buffer_load_dword s12, s[0:3], 0x24 ; C2060124 s_buffer_load_dword s14, s[0:3], 0x25 ; C2070125 v_mul_f32_e32 v16, s22, v13 ; 10201A16 v_mad_f32 v16, s13, v12, v16 ; D2820010 0442180D v_mad_f32 v1, s23, v14, v1 ; D2820001 04061C17 v_mad_f32 v16, s25, v14, v16 ; D2820010 04421C19 v_mad_f32 v16, s21, v0, v16 ; D2820010 04420015 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_movk_i32 s13, 0xf04 ; B00D0F04 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_buffer_load_dword s13, s[0:3], s13 ; C206800D v_rcp_f32_e32 v16, v16 ; 7E205510 v_mad_f32 v15, s24, v14, v15 ; D282000F 043E1C18 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s12, v0, v1 ; D2820001 0406000C v_mad_f32 v0, s14, v0, v15 ; D2820000 043E000E v_mul_f32_e32 v1, v1, v16 ; 10022101 v_mul_f32_e32 v0, v0, v16 ; 10002100 v_mov_b32_e32 v15, s19 ; 7E1E0213 v_mad_f32 v15, s16, v1, v15 ; D282000F 043E0210 v_mov_b32_e32 v1, s18 ; 7E020212 v_mad_f32 v16, s17, v0, v1 ; D2820010 04060011 image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[28:35], s[20:23] ; F0800F00 00A70F0F v_cmp_ne_i32_e64 s[12:13], 0, s13 ; D10A000C 00001A80 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[12:13], s[12:13] ; BE8C240C s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_cbranch_execz BB0_5 ; BF880000 s_buffer_load_dword s14, s[0:3], 0x0 ; C2070100 s_buffer_load_dword s15, s[0:3], 0x1 ; C2078101 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v12, v12 ; 1000190C v_mad_f32 v0, v13, v13, v0 ; D2820000 04021B0D v_mad_f32 v0, v14, v14, v0 ; D2820000 04021D0E v_rsq_clamp_f32_e64 v0, |v0| ; D3580100 00000100 v_add_f32_e32 v1, -1.0, v18 ; 060224F3 v_min_f32_e32 v0, 0x7f7fffff, v0 ; 1E0000FF 7F7FFFFF v_rcp_f32_e32 v0, v0 ; 7E005500 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v0, s14, v0 ; 0800000E v_mul_f32_e32 v0, s15, v0 ; 1000000F v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mad_f32 v0, v0, v1, 1.0 ; D2820000 03CA0300 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E v_mul_f32_e32 v14, v0, v15 ; 101C1F00 v_mul_f32_e32 v13, v0, v16 ; 101A2100 v_mul_f32_e32 v12, v0, v17 ; 10182300 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E v_min_f32_e32 v0, 0x7f7fffff, v11 ; 1E0016FF 7F7FFFFF s_buffer_load_dword s18, s[0:3], 0x10 ; C2090110 s_buffer_load_dword s16, s[0:3], 0x11 ; C2080111 s_buffer_load_dword s17, s[0:3], 0x12 ; C2088112 s_buffer_load_dword s13, s[0:3], 0x14 ; C2068114 s_buffer_load_dword s14, s[0:3], 0x15 ; C2070115 s_buffer_load_dword s15, s[0:3], 0x16 ; C2078116 s_buffer_load_dword s19, s[0:3], 0x28 ; C2098128 s_buffer_load_dword s20, s[0:3], 0x29 ; C20A0129 s_buffer_load_dword s21, s[0:3], 0x2a ; C20A812A s_buffer_load_dword s12, s[0:3], 0x2c ; C206012C s_buffer_load_dword s10, s[0:3], 0x30 ; C2050130 s_buffer_load_dword s11, s[0:3], 0x31 ; C2058131 s_buffer_load_dword s0, s[0:3], 0x32 ; C2000132 v_mul_f32_e32 v1, v0, v9 ; 10021300 v_mad_f32 v11, v9, v0, v1 ; D282000B 04060109 s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504 s_load_dwordx4 s[28:31], s[4:5], 0x8 ; C08E0508 s_load_dwordx8 s[32:39], s[6:7], 0x8 ; C0D00708 s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710 v_mad_f32 v9, -v9, v0, v11 ; D2820009 242E0109 v_mul_f32_e32 v11, v0, v7 ; 10160F00 v_mul_f32_e32 v8, v0, v8 ; 10101100 v_min_f32_e32 v0, 0x7f7fffff, v10 ; 1E0014FF 7F7FFFFF v_mul_f32_e32 v7, v0, v4 ; 100E0900 v_mul_f32_e32 v5, v0, v5 ; 100A0B00 v_mul_f32_e32 v0, v0, v6 ; 10000D00 v_mad_f32 v6, 0, v1, -v11 ; D2820006 842E0280 v_mad_f32 v8, 0, v1, -v8 ; D2820008 84220280 v_mov_b32_e32 v4, s9 ; 7E080209 v_mov_b32_e32 v1, s8 ; 7E020208 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[24:27] ; F0800700 00C80F02 image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[40:47], s[28:31] ; F0800700 00EA1202 v_mul_f32_e32 v2, v7, v6 ; 10040D07 v_mad_f32 v2, v8, v5, v2 ; D2820002 040A0B08 v_mad_f32 v2, v9, v0, v2 ; D2820002 040A0109 v_sub_f32_e64 v3, 1.0, s19 ; D2080003 000026F2 v_sub_f32_e64 v5, 1.0, s20 ; D2080005 000028F2 v_sub_f32_e64 v6, 1.0, s21 ; D2080006 00002AF2 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v3, v15, v3 ; 1006070F v_mad_f32 v3, v4, v3, s18 ; D2820003 004A0704 v_mul_f32_e32 v5, v16, v5 ; 100A0B10 v_mul_f32_e32 v6, v17, v6 ; 100C0D11 v_mad_f32 v5, v5, v4, s16 ; D2820005 00420905 v_mad_f32 v4, v6, v4, s17 ; D2820004 00460906 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, v1, v18, s13 ; D2820006 00362501 v_mad_f32 v7, v19, v1, s14 ; D2820007 003A0313 v_mad_f32 v1, v20, v1, s15 ; D2820001 003E0314 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_and_b32_e32 v8, 0x7fffffff, v2 ; 361004FF 7FFFFFFF v_log_f32_e32 v8, v8 ; 7E104F08 v_mov_b32_e32 v9, 0xb58637bd ; 7E1202FF B58637BD v_add_f32_e32 v10, v9, v0 ; 06140109 v_add_f32_e32 v2, v9, v2 ; 06040509 v_mul_legacy_f32_e32 v8, s12, v8 ; 0E10100C v_exp_f32_e32 v8, v8 ; 7E104B08 v_mov_b32_e32 v9, 0x41000000 ; 7E1202FF 41000000 v_add_f32_e32 v9, s12, v9 ; 0612120C v_mul_f32_e32 v8, v8, v9 ; 10101308 v_mul_f32_e32 v8, 0x3d22f983, v8 ; 101010FF 3D22F983 v_mul_f32_e32 v3, v3, v0 ; 10060103 v_mul_f32_e32 v5, v5, v0 ; 100A0105 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_cndmask_b32_e64 v4, v5, 0, vcc ; D2000004 01A90105 v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 v_mul_f32_e32 v5, v8, v6 ; 100A0D08 v_mul_f32_e32 v6, v8, v7 ; 100C0F08 v_mul_f32_e32 v1, v8, v1 ; 10020308 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v2, v5, 0, vcc ; D2000002 01A90105 v_cndmask_b32_e64 v5, v6, 0, vcc ; D2000005 01A90106 v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 v_add_f32_e32 v2, v3, v2 ; 06040503 v_add_f32_e32 v3, v4, v5 ; 06060B04 v_add_f32_e32 v0, v0, v1 ; 06000300 v_mul_f32_e32 v1, v2, v14 ; 10021D02 v_mul_f32_e32 v2, v3, v13 ; 10041B03 v_mul_f32_e32 v0, v0, v12 ; 10001900 v_mul_f32_e32 v1, s10, v1 ; 1002020A v_mul_f32_e32 v2, s11, v2 ; 1004040B v_mul_f32_e32 v0, s0, v0 ; 10000000 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 24 Code Size: 1060 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2].xy, TEXCOORD[0] DCL OUT[3].x, TEXCOORD[4] DCL CONST[0..255] DCL TEMP[0..4] DCL ADDR[0] IMM[0] FLT32 { 3.0000, 1.0000, 0.0000, 0.0000} 0: MUL TEMP[0], IMM[0].xxxx, IN[1] 1: MOV TEMP[1], TEMP[0].yxzw 2: ARR ADDR[0], TEMP[1] 3: MUL TEMP[2], IN[2].yyyy, CONST[ADDR[0].x+7] 4: ARR ADDR[0], TEMP[1] 5: MAD TEMP[2], IN[2].xxxx, CONST[ADDR[0].y+7], TEMP[2] 6: ARR ADDR[0], TEMP[1] 7: MAD TEMP[0], IN[2].zzzz, CONST[ADDR[0].z+7], TEMP[2] 8: ARR ADDR[0], TEMP[1] 9: MAD TEMP[0], IN[2].wwww, CONST[ADDR[0].w+7], TEMP[0] 10: MOV TEMP[2].xyz, CONST[240] 11: MAD TEMP[2].xyz, IN[0], TEMP[2], CONST[239] 12: MOV TEMP[2].w, IMM[0].yyyy 13: DP4 TEMP[0].x, TEMP[2], TEMP[0] 14: MUL TEMP[0], TEMP[0].xxxx, CONST[232] 15: ARR ADDR[0], TEMP[1] 16: MUL TEMP[3], IN[2].yyyy, CONST[ADDR[0].x+6] 17: ARR ADDR[0], TEMP[1] 18: MUL TEMP[4], IN[2].yyyy, CONST[ADDR[0].x+8] 19: ARR ADDR[0], TEMP[1] 20: MAD TEMP[4], IN[2].xxxx, CONST[ADDR[0].y+8], TEMP[4] 21: ARR ADDR[0], TEMP[1] 22: MAD TEMP[3], IN[2].xxxx, CONST[ADDR[0].y+6], TEMP[3] 23: ARR ADDR[0], TEMP[1] 24: MAD TEMP[3], IN[2].zzzz, CONST[ADDR[0].z+6], TEMP[3] 25: ARR ADDR[0], TEMP[1] 26: MAD TEMP[4], IN[2].zzzz, CONST[ADDR[0].z+8], TEMP[4] 27: ARR ADDR[0], TEMP[1] 28: MAD TEMP[4], IN[2].wwww, CONST[ADDR[0].w+8], TEMP[4] 29: ARR ADDR[0], TEMP[1] 30: MAD TEMP[3], IN[2].wwww, CONST[ADDR[0].w+6], TEMP[3] 31: DP4 TEMP[3].x, TEMP[2], TEMP[3] 32: DP4 TEMP[2].x, TEMP[2], TEMP[4] 33: MAD TEMP[0], CONST[231], TEMP[3].xxxx, TEMP[0] 34: MAD TEMP[0], CONST[233], TEMP[2].xxxx, TEMP[0] 35: ADD TEMP[0], TEMP[0], CONST[234] 36: MUL TEMP[2], TEMP[0].yyyy, CONST[236] 37: MAD TEMP[2], CONST[235], TEMP[0].xxxx, TEMP[2] 38: MAD TEMP[2], CONST[237], TEMP[0].zzzz, TEMP[2] 39: MAD TEMP[0], CONST[238], TEMP[0].wwww, TEMP[2] 40: SLT TEMP[2].x, TEMP[0].zzzz, IMM[0].zzzz 41: ABS TEMP[2].y, CONST[243].xxxx 42: SLT TEMP[2].y, -TEMP[2].yyyy, TEMP[2].yyyy 43: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].yyyy 44: LRP TEMP[3].xy, TEMP[2].xxxx, IMM[0].wyzw, TEMP[0].zwzw 45: MOV OUT[0].xy, TEMP[0] 46: MOV TEMP[0].x, CONST[241].xxxx 47: MAD OUT[3].x, TEMP[3].xxxx, TEMP[0].xxxx, CONST[242].xxxx 48: MOV OUT[0].zw, TEMP[3].xyxy 49: MOV OUT[1], IN[4] 50: MOV OUT[2].xy, IN[3] 51: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3696) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3700) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3704) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3708) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3712) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3716) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3720) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3724) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3728) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3732) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3736) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3740) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3744) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3748) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3752) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3756) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3760) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3764) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3768) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3772) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3776) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3780) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3784) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3788) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3792) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3796) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3800) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3804) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3808) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3812) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3816) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3820) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3824) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3828) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3832) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3840) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3844) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3848) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3856) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3872) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3888) %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = add i32 %5, %7 %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %62, i32 0, i32 %63) %65 = extractelement <4 x float> %64, i32 0 %66 = extractelement <4 x float> %64, i32 1 %67 = extractelement <4 x float> %64, i32 2 %68 = extractelement <4 x float> %64, i32 3 %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = add i32 %5, %7 %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %71) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = extractelement <4 x float> %72, i32 2 %76 = extractelement <4 x float> %72, i32 3 %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 %79 = add i32 %5, %7 %80 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %78, i32 0, i32 %79) %81 = extractelement <4 x float> %80, i32 0 %82 = extractelement <4 x float> %80, i32 1 %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 %85 = add i32 %5, %7 %86 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %85) %87 = extractelement <4 x float> %86, i32 0 %88 = extractelement <4 x float> %86, i32 1 %89 = extractelement <4 x float> %86, i32 2 %90 = extractelement <4 x float> %86, i32 3 %91 = fmul float %65, 3.000000e+00 %92 = fmul float %66, 3.000000e+00 %93 = fmul float %67, 3.000000e+00 %94 = fmul float %68, 3.000000e+00 %95 = call float @llvm.AMDIL.round.nearest.(float %92) %96 = fptosi float %95 to i32 %97 = call float @llvm.AMDIL.round.nearest.(float %91) %98 = call float @llvm.AMDIL.round.nearest.(float %93) %99 = call float @llvm.AMDIL.round.nearest.(float %94) %100 = shl i32 %96, 4 %101 = add i32 %100, 112 %102 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %101) %103 = fmul float %74, %102 %104 = shl i32 %96, 4 %105 = add i32 %104, 116 %106 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %105) %107 = fmul float %74, %106 %108 = shl i32 %96, 4 %109 = add i32 %108, 120 %110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %109) %111 = fmul float %74, %110 %112 = shl i32 %96, 4 %113 = add i32 %112, 124 %114 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %113) %115 = fmul float %74, %114 %116 = call float @llvm.AMDIL.round.nearest.(float %92) %117 = call float @llvm.AMDIL.round.nearest.(float %91) %118 = fptosi float %117 to i32 %119 = call float @llvm.AMDIL.round.nearest.(float %93) %120 = call float @llvm.AMDIL.round.nearest.(float %94) %121 = shl i32 %118, 4 %122 = add i32 %121, 112 %123 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %122) %124 = fmul float %73, %123 %125 = fadd float %124, %103 %126 = shl i32 %118, 4 %127 = add i32 %126, 116 %128 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %127) %129 = fmul float %73, %128 %130 = fadd float %129, %107 %131 = shl i32 %118, 4 %132 = add i32 %131, 120 %133 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %132) %134 = fmul float %73, %133 %135 = fadd float %134, %111 %136 = shl i32 %118, 4 %137 = add i32 %136, 124 %138 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %137) %139 = fmul float %73, %138 %140 = fadd float %139, %115 %141 = call float @llvm.AMDIL.round.nearest.(float %92) %142 = call float @llvm.AMDIL.round.nearest.(float %91) %143 = call float @llvm.AMDIL.round.nearest.(float %93) %144 = fptosi float %143 to i32 %145 = call float @llvm.AMDIL.round.nearest.(float %94) %146 = shl i32 %144, 4 %147 = add i32 %146, 112 %148 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %147) %149 = fmul float %75, %148 %150 = fadd float %149, %125 %151 = shl i32 %144, 4 %152 = add i32 %151, 116 %153 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %152) %154 = fmul float %75, %153 %155 = fadd float %154, %130 %156 = shl i32 %144, 4 %157 = add i32 %156, 120 %158 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %157) %159 = fmul float %75, %158 %160 = fadd float %159, %135 %161 = shl i32 %144, 4 %162 = add i32 %161, 124 %163 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %162) %164 = fmul float %75, %163 %165 = fadd float %164, %140 %166 = call float @llvm.AMDIL.round.nearest.(float %92) %167 = call float @llvm.AMDIL.round.nearest.(float %91) %168 = call float @llvm.AMDIL.round.nearest.(float %93) %169 = call float @llvm.AMDIL.round.nearest.(float %94) %170 = fptosi float %169 to i32 %171 = shl i32 %170, 4 %172 = add i32 %171, 112 %173 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %172) %174 = fmul float %76, %173 %175 = fadd float %174, %150 %176 = shl i32 %170, 4 %177 = add i32 %176, 116 %178 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %177) %179 = fmul float %76, %178 %180 = fadd float %179, %155 %181 = shl i32 %170, 4 %182 = add i32 %181, 120 %183 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %182) %184 = fmul float %76, %183 %185 = fadd float %184, %160 %186 = shl i32 %170, 4 %187 = add i32 %186, 124 %188 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %187) %189 = fmul float %76, %188 %190 = fadd float %189, %165 %191 = fmul float %58, %48 %192 = fadd float %191, %45 %193 = fmul float %59, %49 %194 = fadd float %193, %46 %195 = fmul float %60, %50 %196 = fadd float %195, %47 %197 = fmul float %192, %175 %198 = fmul float %194, %180 %199 = fadd float %197, %198 %200 = fmul float %196, %185 %201 = fadd float %199, %200 %202 = fadd float %201, %190 %203 = fmul float %202, %17 %204 = fmul float %202, %18 %205 = fmul float %202, %19 %206 = fmul float %202, %20 %207 = call float @llvm.AMDIL.round.nearest.(float %92) %208 = fptosi float %207 to i32 %209 = call float @llvm.AMDIL.round.nearest.(float %91) %210 = call float @llvm.AMDIL.round.nearest.(float %93) %211 = call float @llvm.AMDIL.round.nearest.(float %94) %212 = shl i32 %208, 4 %213 = add i32 %212, 96 %214 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %213) %215 = fmul float %74, %214 %216 = shl i32 %208, 4 %217 = add i32 %216, 100 %218 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %217) %219 = fmul float %74, %218 %220 = shl i32 %208, 4 %221 = add i32 %220, 104 %222 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %221) %223 = fmul float %74, %222 %224 = shl i32 %208, 4 %225 = add i32 %224, 108 %226 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %225) %227 = fmul float %74, %226 %228 = call float @llvm.AMDIL.round.nearest.(float %92) %229 = fptosi float %228 to i32 %230 = call float @llvm.AMDIL.round.nearest.(float %91) %231 = call float @llvm.AMDIL.round.nearest.(float %93) %232 = call float @llvm.AMDIL.round.nearest.(float %94) %233 = shl i32 %229, 4 %234 = add i32 %233, 128 %235 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %234) %236 = fmul float %74, %235 %237 = shl i32 %229, 4 %238 = add i32 %237, 132 %239 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %238) %240 = fmul float %74, %239 %241 = shl i32 %229, 4 %242 = add i32 %241, 136 %243 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %242) %244 = fmul float %74, %243 %245 = shl i32 %229, 4 %246 = add i32 %245, 140 %247 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %246) %248 = fmul float %74, %247 %249 = call float @llvm.AMDIL.round.nearest.(float %92) %250 = call float @llvm.AMDIL.round.nearest.(float %91) %251 = fptosi float %250 to i32 %252 = call float @llvm.AMDIL.round.nearest.(float %93) %253 = call float @llvm.AMDIL.round.nearest.(float %94) %254 = shl i32 %251, 4 %255 = add i32 %254, 128 %256 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %255) %257 = fmul float %73, %256 %258 = fadd float %257, %236 %259 = shl i32 %251, 4 %260 = add i32 %259, 132 %261 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %260) %262 = fmul float %73, %261 %263 = fadd float %262, %240 %264 = shl i32 %251, 4 %265 = add i32 %264, 136 %266 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %265) %267 = fmul float %73, %266 %268 = fadd float %267, %244 %269 = shl i32 %251, 4 %270 = add i32 %269, 140 %271 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %270) %272 = fmul float %73, %271 %273 = fadd float %272, %248 %274 = call float @llvm.AMDIL.round.nearest.(float %92) %275 = call float @llvm.AMDIL.round.nearest.(float %91) %276 = fptosi float %275 to i32 %277 = call float @llvm.AMDIL.round.nearest.(float %93) %278 = call float @llvm.AMDIL.round.nearest.(float %94) %279 = shl i32 %276, 4 %280 = add i32 %279, 96 %281 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %280) %282 = fmul float %73, %281 %283 = fadd float %282, %215 %284 = shl i32 %276, 4 %285 = add i32 %284, 100 %286 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %285) %287 = fmul float %73, %286 %288 = fadd float %287, %219 %289 = shl i32 %276, 4 %290 = add i32 %289, 104 %291 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %290) %292 = fmul float %73, %291 %293 = fadd float %292, %223 %294 = shl i32 %276, 4 %295 = add i32 %294, 108 %296 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %295) %297 = fmul float %73, %296 %298 = fadd float %297, %227 %299 = call float @llvm.AMDIL.round.nearest.(float %92) %300 = call float @llvm.AMDIL.round.nearest.(float %91) %301 = call float @llvm.AMDIL.round.nearest.(float %93) %302 = fptosi float %301 to i32 %303 = call float @llvm.AMDIL.round.nearest.(float %94) %304 = shl i32 %302, 4 %305 = add i32 %304, 96 %306 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %305) %307 = fmul float %75, %306 %308 = fadd float %307, %283 %309 = shl i32 %302, 4 %310 = add i32 %309, 100 %311 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %310) %312 = fmul float %75, %311 %313 = fadd float %312, %288 %314 = shl i32 %302, 4 %315 = add i32 %314, 104 %316 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %315) %317 = fmul float %75, %316 %318 = fadd float %317, %293 %319 = shl i32 %302, 4 %320 = add i32 %319, 108 %321 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %320) %322 = fmul float %75, %321 %323 = fadd float %322, %298 %324 = call float @llvm.AMDIL.round.nearest.(float %92) %325 = call float @llvm.AMDIL.round.nearest.(float %91) %326 = call float @llvm.AMDIL.round.nearest.(float %93) %327 = fptosi float %326 to i32 %328 = call float @llvm.AMDIL.round.nearest.(float %94) %329 = shl i32 %327, 4 %330 = add i32 %329, 128 %331 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %330) %332 = fmul float %75, %331 %333 = fadd float %332, %258 %334 = shl i32 %327, 4 %335 = add i32 %334, 132 %336 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %335) %337 = fmul float %75, %336 %338 = fadd float %337, %263 %339 = shl i32 %327, 4 %340 = add i32 %339, 136 %341 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %340) %342 = fmul float %75, %341 %343 = fadd float %342, %268 %344 = shl i32 %327, 4 %345 = add i32 %344, 140 %346 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %345) %347 = fmul float %75, %346 %348 = fadd float %347, %273 %349 = call float @llvm.AMDIL.round.nearest.(float %92) %350 = call float @llvm.AMDIL.round.nearest.(float %91) %351 = call float @llvm.AMDIL.round.nearest.(float %93) %352 = call float @llvm.AMDIL.round.nearest.(float %94) %353 = fptosi float %352 to i32 %354 = shl i32 %353, 4 %355 = add i32 %354, 128 %356 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %355) %357 = fmul float %76, %356 %358 = fadd float %357, %333 %359 = shl i32 %353, 4 %360 = add i32 %359, 132 %361 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %360) %362 = fmul float %76, %361 %363 = fadd float %362, %338 %364 = shl i32 %353, 4 %365 = add i32 %364, 136 %366 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %365) %367 = fmul float %76, %366 %368 = fadd float %367, %343 %369 = shl i32 %353, 4 %370 = add i32 %369, 140 %371 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %370) %372 = fmul float %76, %371 %373 = fadd float %372, %348 %374 = call float @llvm.AMDIL.round.nearest.(float %92) %375 = call float @llvm.AMDIL.round.nearest.(float %91) %376 = call float @llvm.AMDIL.round.nearest.(float %93) %377 = call float @llvm.AMDIL.round.nearest.(float %94) %378 = fptosi float %377 to i32 %379 = shl i32 %378, 4 %380 = add i32 %379, 96 %381 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %380) %382 = fmul float %76, %381 %383 = fadd float %382, %308 %384 = shl i32 %378, 4 %385 = add i32 %384, 100 %386 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %385) %387 = fmul float %76, %386 %388 = fadd float %387, %313 %389 = shl i32 %378, 4 %390 = add i32 %389, 104 %391 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %390) %392 = fmul float %76, %391 %393 = fadd float %392, %318 %394 = shl i32 %378, 4 %395 = add i32 %394, 108 %396 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %395) %397 = fmul float %76, %396 %398 = fadd float %397, %323 %399 = fmul float %192, %383 %400 = fmul float %194, %388 %401 = fadd float %399, %400 %402 = fmul float %196, %393 %403 = fadd float %401, %402 %404 = fadd float %403, %398 %405 = fmul float %192, %358 %406 = fmul float %194, %363 %407 = fadd float %405, %406 %408 = fmul float %196, %368 %409 = fadd float %407, %408 %410 = fadd float %409, %373 %411 = fmul float %13, %404 %412 = fadd float %411, %203 %413 = fmul float %14, %404 %414 = fadd float %413, %204 %415 = fmul float %15, %404 %416 = fadd float %415, %205 %417 = fmul float %16, %404 %418 = fadd float %417, %206 %419 = fmul float %21, %410 %420 = fadd float %419, %412 %421 = fmul float %22, %410 %422 = fadd float %421, %414 %423 = fmul float %23, %410 %424 = fadd float %423, %416 %425 = fmul float %24, %410 %426 = fadd float %425, %418 %427 = fadd float %420, %25 %428 = fadd float %422, %26 %429 = fadd float %424, %27 %430 = fadd float %426, %28 %431 = fmul float %428, %33 %432 = fmul float %428, %34 %433 = fmul float %428, %35 %434 = fmul float %428, %36 %435 = fmul float %29, %427 %436 = fadd float %435, %431 %437 = fmul float %30, %427 %438 = fadd float %437, %432 %439 = fmul float %31, %427 %440 = fadd float %439, %433 %441 = fmul float %32, %427 %442 = fadd float %441, %434 %443 = fmul float %37, %429 %444 = fadd float %443, %436 %445 = fmul float %38, %429 %446 = fadd float %445, %438 %447 = fmul float %39, %429 %448 = fadd float %447, %440 %449 = fmul float %40, %429 %450 = fadd float %449, %442 %451 = fmul float %41, %430 %452 = fadd float %451, %444 %453 = fmul float %42, %430 %454 = fadd float %453, %446 %455 = fmul float %43, %430 %456 = fadd float %455, %448 %457 = fmul float %44, %430 %458 = fadd float %457, %450 %459 = fcmp olt float %456, 0.000000e+00 %460 = select i1 %459, float 1.000000e+00, float 0.000000e+00 %461 = call float @fabs(float %53) %462 = fsub float -0.000000e+00, %461 %463 = fcmp ogt float %461, %462 %464 = select i1 %463, float 1.000000e+00, float 0.000000e+00 %465 = fmul float %460, %464 %466 = call float @llvm.AMDGPU.lrp(float %465, float 0x3EB0C6F7A0000000, float %456) %467 = call float @llvm.AMDGPU.lrp(float %465, float 1.000000e+00, float %458) %468 = fmul float %466, %51 %469 = fadd float %468, %52 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %87, float %88, float %89, float %90) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %81, float %82, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %469, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %452, float %454, float %466, float %467) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.round.nearest.(float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_movk_i32 s42, 0x74 ; B02A0074 s_movk_i32 s43, 0x70 ; B02B0070 s_movk_i32 s34, 0x64 ; B0220064 s_movk_i32 s39, 0x78 ; B0270078 s_movk_i32 s35, 0x60 ; B0230060 s_movk_i32 s37, 0x84 ; B0250084 s_movk_i32 s41, 0x7c ; B029007C s_movk_i32 s33, 0x68 ; B0210068 s_movk_i32 s40, 0x80 ; B0280080 s_movk_i32 s32, 0x6c ; B020006C s_movk_i32 s38, 0x88 ; B0260088 s_movk_i32 s36, 0x8c ; B024008C s_movk_i32 s4, 0xef4 ; B0040EF4 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_movk_i32 s5, 0xf04 ; B0050F04 s_buffer_load_dword s29, s[0:3], s5 ; C20E8005 s_movk_i32 s5, 0xef0 ; B0050EF0 s_buffer_load_dword s5, s[0:3], s5 ; C2028005 s_movk_i32 s6, 0xf00 ; B0060F00 s_buffer_load_dword s30, s[0:3], s6 ; C20F0006 s_movk_i32 s6, 0xef8 ; B0060EF8 s_buffer_load_dword s7, s[0:3], s6 ; C2038006 s_movk_i32 s6, 0xf08 ; B0060F08 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 s_buffer_load_dword s31, s[0:3], s6 ; C20F8006 s_movk_i32 s4, 0xe84 ; B0040E84 s_buffer_load_dword s6, s[0:3], s4 ; C2030004 s_movk_i32 s4, 0xe74 ; B0040E74 v_mov_b32_e32 v3, s5 ; 7E060205 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_movk_i32 s5, 0xe80 ; B0050E80 s_buffer_load_dword s11, s[0:3], s5 ; C2058005 s_movk_i32 s5, 0xe70 ; B0050E70 v_mov_b32_e32 v4, s7 ; 7E080207 s_buffer_load_dword s10, s[0:3], s5 ; C2050005 s_movk_i32 s5, 0xe88 ; B0050E88 s_buffer_load_dword s12, s[0:3], s5 ; C2060005 s_movk_i32 s5, 0xe94 ; B0050E94 s_buffer_load_dword s5, s[0:3], s5 ; C2028005 s_movk_i32 s7, 0xe78 ; B0070E78 s_buffer_load_dword s13, s[0:3], s7 ; C2068007 s_movk_i32 s7, 0xe8c ; B0070E8C s_buffer_load_dword s16, s[0:3], s7 ; C2080007 s_movk_i32 s7, 0xe90 ; B0070E90 s_buffer_load_dword s14, s[0:3], s7 ; C2070007 s_movk_i32 s7, 0xea4 ; B0070EA4 s_buffer_load_dword s7, s[0:3], s7 ; C2038007 s_movk_i32 s15, 0xe7c ; B00F0E7C s_buffer_load_dword s15, s[0:3], s15 ; C207800F s_movk_i32 s17, 0xe98 ; B0110E98 s_buffer_load_dword s21, s[0:3], s17 ; C20A8011 s_movk_i32 s17, 0xea0 ; B0110EA0 s_buffer_load_dword s19, s[0:3], s17 ; C2098011 s_movk_i32 s17, 0xec8 ; B0110EC8 s_buffer_load_dword s17, s[0:3], s17 ; C2088011 s_movk_i32 s18, 0xe9c ; B0120E9C s_buffer_load_dword s23, s[0:3], s18 ; C20B8012 s_movk_i32 s18, 0xea8 ; B0120EA8 s_buffer_load_dword s24, s[0:3], s18 ; C20C0012 s_movk_i32 s18, 0xeb8 ; B0120EB8 s_buffer_load_dword s22, s[0:3], s18 ; C20B0012 s_movk_i32 s18, 0xeac ; B0120EAC s_buffer_load_dword s27, s[0:3], s18 ; C20D8012 s_movk_i32 s18, 0xed8 ; B0120ED8 s_buffer_load_dword s25, s[0:3], s18 ; C20C8012 s_movk_i32 s18, 0xee8 ; B0120EE8 s_buffer_load_dword s26, s[0:3], s18 ; C20D0012 s_movk_i32 s18, 0xf30 ; B0120F30 s_buffer_load_dword s28, s[0:3], s18 ; C20E0012 s_movk_i32 s18, 0xecc ; B0120ECC s_buffer_load_dword s20, s[0:3], s18 ; C20A0012 s_movk_i32 s18, 0xebc ; B0120EBC s_buffer_load_dword s18, s[0:3], s18 ; C2090012 s_load_dwordx4 s[44:47], s[8:9], 0x0 ; C0960900 s_load_dwordx4 s[48:51], s[8:9], 0x4 ; C0980904 s_load_dwordx4 s[52:55], s[8:9], 0x8 ; C09A0908 s_load_dwordx4 s[56:59], s[8:9], 0xc ; C09C090C s_load_dwordx4 s[60:63], s[8:9], 0x10 ; C09E0910 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[5:8], v0, s[44:47], 0 idxen ; E00C2000 800B0500 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[8:11], v0, s[48:51], 0 idxen ; E00C2000 800C0800 buffer_load_format_xyzw v[12:15], v0, s[52:55], 0 idxen ; E00C2000 800D0C00 buffer_load_format_xyzw v[16:19], v0, s[56:59], 0 idxen ; E00C2000 800E1000 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[18:21], v0, s[60:63], 0 idxen ; E00C2000 800F1200 v_mul_f32_e32 v0, 0x40400000, v9 ; 100012FF 40400000 v_rndne_f32_e32 v0, v0 ; 7E004700 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_mul_f32_e32 v8, 0x40400000, v8 ; 101010FF 40400000 v_rndne_f32_e32 v8, v8 ; 7E104708 v_cvt_i32_f32_e32 v8, v8 ; 7E101108 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_mul_f32_e32 v9, 0x40400000, v10 ; 101214FF 40400000 v_mul_f32_e32 v10, 0x40400000, v11 ; 101416FF 40400000 v_rndne_f32_e32 v9, v9 ; 7E124709 v_rndne_f32_e32 v10, v10 ; 7E14470A v_cvt_i32_f32_e32 v9, v9 ; 7E121109 v_cvt_i32_f32_e32 v10, v10 ; 7E14110A v_lshlrev_b32_e32 v8, 4, v8 ; 34101084 v_add_i32_e32 v11, s42, v0 ; 4A16002A buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B v_lshlrev_b32_e32 v9, 4, v9 ; 34121284 v_lshlrev_b32_e32 v10, 4, v10 ; 34141484 v_add_i32_e32 v22, s43, v0 ; 4A2C002B buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 v_add_i32_e32 v23, s42, v8 ; 4A2E102A buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 v_add_i32_e32 v24, s34, v0 ; 4A300022 buffer_load_dword v24, v24, s[0:3], 0 offen ; E0301000 80001818 v_add_i32_e32 v25, s39, v0 ; 4A320027 buffer_load_dword v25, v25, s[0:3], 0 offen ; E0301000 80001919 v_add_i32_e32 v26, s43, v8 ; 4A34102B buffer_load_dword v26, v26, s[0:3], 0 offen ; E0301000 80001A1A v_add_i32_e32 v27, s42, v9 ; 4A36122A buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B v_add_i32_e32 v28, s35, v0 ; 4A380023 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C v_add_i32_e32 v29, s37, v0 ; 4A3A0025 buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D v_add_i32_e32 v30, s34, v8 ; 4A3C1022 buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E v_add_i32_e32 v31, s41, v0 ; 4A3E0029 buffer_load_dword v31, v31, s[0:3], 0 offen ; E0301000 80001F1F v_add_i32_e32 v32, s39, v8 ; 4A401027 buffer_load_dword v32, v32, s[0:3], 0 offen ; E0301000 80002020 v_add_i32_e32 v33, s43, v9 ; 4A42122B buffer_load_dword v33, v33, s[0:3], 0 offen ; E0301000 80002121 v_add_i32_e32 v34, s42, v10 ; 4A44142A buffer_load_dword v34, v34, s[0:3], 0 offen ; E0301000 80002222 v_add_i32_e32 v35, s33, v0 ; 4A460021 buffer_load_dword v35, v35, s[0:3], 0 offen ; E0301000 80002323 v_add_i32_e32 v36, s40, v0 ; 4A480028 buffer_load_dword v36, v36, s[0:3], 0 offen ; E0301000 80002424 v_add_i32_e32 v37, s37, v8 ; 4A4A1025 buffer_load_dword v37, v37, s[0:3], 0 offen ; E0301000 80002525 v_add_i32_e32 v38, s35, v8 ; 4A4C1023 buffer_load_dword v38, v38, s[0:3], 0 offen ; E0301000 80002626 v_add_i32_e32 v39, s34, v9 ; 4A4E1222 buffer_load_dword v39, v39, s[0:3], 0 offen ; E0301000 80002727 v_add_i32_e32 v40, s41, v8 ; 4A501029 buffer_load_dword v40, v40, s[0:3], 0 offen ; E0301000 80002828 v_add_i32_e32 v41, s39, v9 ; 4A521227 buffer_load_dword v41, v41, s[0:3], 0 offen ; E0301000 80002929 v_add_i32_e32 v42, s43, v10 ; 4A54142B buffer_load_dword v42, v42, s[0:3], 0 offen ; E0301000 80002A2A v_add_i32_e32 v43, s32, v0 ; 4A560020 buffer_load_dword v43, v43, s[0:3], 0 offen ; E0301000 80002B2B v_add_i32_e32 v44, s38, v0 ; 4A580026 buffer_load_dword v44, v44, s[0:3], 0 offen ; E0301000 80002C2C v_add_i32_e32 v0, s36, v0 ; 4A000024 buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 v_add_i32_e32 v45, s40, v8 ; 4A5A1028 buffer_load_dword v45, v45, s[0:3], 0 offen ; E0301000 80002D2D v_add_i32_e32 v46, s33, v8 ; 4A5C1021 v_add_i32_e32 v47, s35, v9 ; 4A5E1223 v_add_i32_e32 v48, s37, v9 ; 4A601225 v_add_i32_e32 v49, s41, v9 ; 4A621229 v_add_i32_e32 v50, s38, v8 ; 4A641026 buffer_load_dword v50, v50, s[0:3], 0 offen ; E0301000 80003232 v_add_i32_e32 v51, s32, v8 ; 4A661020 v_add_i32_e32 v8, s36, v8 ; 4A101024 buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 v_add_i32_e32 v52, s33, v9 ; 4A681221 v_add_i32_e32 v53, s40, v9 ; 4A6A1228 v_add_i32_e32 v54, s32, v9 ; 4A6C1220 v_add_i32_e32 v55, s38, v9 ; 4A6E1226 v_add_i32_e32 v9, s36, v9 ; 4A121224 buffer_load_dword v49, v49, s[0:3], 0 offen ; E0301000 80003131 buffer_load_dword v46, v46, s[0:3], 0 offen ; E0301000 80002E2E buffer_load_dword v51, v51, s[0:3], 0 offen ; E0301000 80003333 buffer_load_dword v47, v47, s[0:3], 0 offen ; E0301000 80002F2F buffer_load_dword v52, v52, s[0:3], 0 offen ; E0301000 80003434 buffer_load_dword v54, v54, s[0:3], 0 offen ; E0301000 80003636 buffer_load_dword v53, v53, s[0:3], 0 offen ; E0301000 80003535 buffer_load_dword v48, v48, s[0:3], 0 offen ; E0301000 80003030 buffer_load_dword v55, v55, s[0:3], 0 offen ; E0301000 80003737 buffer_load_dword v9, v9, s[0:3], 0 offen ; E0301000 80000909 v_add_i32_e32 v56, s39, v10 ; 4A701427 v_add_i32_e32 v57, s41, v10 ; 4A721429 v_add_i32_e32 v58, s40, v10 ; 4A741428 v_add_i32_e32 v59, s37, v10 ; 4A761425 v_add_i32_e32 v60, s38, v10 ; 4A781426 v_add_i32_e32 v61, s36, v10 ; 4A7A1424 v_add_i32_e32 v62, s35, v10 ; 4A7C1423 v_add_i32_e32 v63, s34, v10 ; 4A7E1422 v_add_i32_e32 v64, s33, v10 ; 4A801421 v_add_i32_e32 v10, s32, v10 ; 4A141420 buffer_load_dword v56, v56, s[0:3], 0 offen ; E0301000 80003838 buffer_load_dword v57, v57, s[0:3], 0 offen ; E0301000 80003939 buffer_load_dword v58, v58, s[0:3], 0 offen ; E0301000 80003A3A buffer_load_dword v59, v59, s[0:3], 0 offen ; E0301000 80003B3B buffer_load_dword v60, v60, s[0:3], 0 offen ; E0301000 80003C3C buffer_load_dword v61, v61, s[0:3], 0 offen ; E0301000 80003D3D buffer_load_dword v62, v62, s[0:3], 0 offen ; E0301000 80003E3E buffer_load_dword v63, v63, s[0:3], 0 offen ; E0301000 80003F3F buffer_load_dword v64, v64, s[0:3], 0 offen ; E0301000 80004040 buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt ; BF8C077F v_mul_f32_e32 v22, v22, v13 ; 102C1B16 v_mad_f32 v22, v12, v26, v22 ; D2820016 045A350C v_mul_f32_e32 v11, v11, v13 ; 10161B0B v_mad_f32 v11, v12, v23, v11 ; D282000B 042E2F0C v_mul_f32_e32 v23, v25, v13 ; 102E1B19 v_mad_f32 v23, v12, v32, v23 ; D2820017 045E410C v_mul_f32_e32 v25, v31, v13 ; 10321B1F v_mad_f32 v25, v12, v40, v25 ; D2820019 0466510C v_mul_f32_e32 v26, v36, v13 ; 10341B24 v_mad_f32 v26, v12, v45, v26 ; D282001A 046A5B0C v_mul_f32_e32 v29, v29, v13 ; 103A1B1D v_mad_f32 v29, v12, v37, v29 ; D282001D 04764B0C v_mul_f32_e32 v31, v44, v13 ; 103E1B2C v_mad_f32 v31, v12, v50, v31 ; D282001F 047E650C v_mul_f32_e32 v0, v0, v13 ; 10001B00 v_mad_f32 v0, v12, v8, v0 ; D2820000 0402110C v_mul_f32_e32 v8, v28, v13 ; 10101B1C v_mad_f32 v8, v12, v38, v8 ; D2820008 04224D0C v_mul_f32_e32 v24, v24, v13 ; 10301B18 v_mad_f32 v24, v12, v30, v24 ; D2820018 04623D0C v_mul_f32_e32 v28, v35, v13 ; 10381B23 v_mad_f32 v28, v12, v46, v28 ; D282001C 04725D0C v_mul_f32_e32 v13, v43, v13 ; 101A1B2B v_mad_f32 v12, v12, v51, v13 ; D282000C 0436670C v_mad_f32 v13, v14, v33, v22 ; D282000D 045A430E v_mad_f32 v11, v14, v27, v11 ; D282000B 042E370E v_mad_f32 v22, v14, v41, v23 ; D2820016 045E530E v_mad_f32 v23, v14, v49, v25 ; D2820017 0466630E v_mad_f32 v8, v14, v47, v8 ; D2820008 04225F0E v_mad_f32 v24, v14, v39, v24 ; D2820018 04624F0E v_mad_f32 v25, v14, v52, v28 ; D2820019 0472690E s_waitcnt vmcnt(14) ; BF8C077E v_mad_f32 v12, v14, v54, v12 ; D282000C 04326D0E s_waitcnt vmcnt(13) ; BF8C077D v_mad_f32 v26, v14, v53, v26 ; D282001A 046A6B0E s_waitcnt vmcnt(12) ; BF8C077C v_mad_f32 v27, v14, v48, v29 ; D282001B 0476610E s_waitcnt vmcnt(11) ; BF8C077B v_mad_f32 v28, v14, v55, v31 ; D282001C 047E6F0E s_waitcnt vmcnt(10) ; BF8C077A v_mad_f32 v0, v14, v9, v0 ; D2820000 0402130E v_mad_f32 v9, v15, v42, v13 ; D2820009 0436550F v_mad_f32 v11, v15, v34, v11 ; D282000B 042E450F s_waitcnt vmcnt(9) ; BF8C0779 v_mad_f32 v13, v15, v56, v22 ; D282000D 045A710F s_waitcnt vmcnt(8) ; BF8C0778 v_mad_f32 v14, v15, v57, v23 ; D282000E 045E730F s_waitcnt vmcnt(7) ; BF8C0777 v_mad_f32 v22, v15, v58, v26 ; D2820016 046A750F s_waitcnt vmcnt(6) ; BF8C0776 v_mad_f32 v23, v15, v59, v27 ; D2820017 046E770F s_waitcnt vmcnt(5) ; BF8C0775 v_mad_f32 v26, v15, v60, v28 ; D282001A 0472790F s_waitcnt vmcnt(4) ; BF8C0774 v_mad_f32 v0, v15, v61, v0 ; D2820000 04027B0F s_waitcnt vmcnt(3) ; BF8C0773 v_mad_f32 v8, v15, v62, v8 ; D2820008 04227D0F s_waitcnt vmcnt(2) ; BF8C0772 v_mad_f32 v24, v15, v63, v24 ; D2820018 04627F0F s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v25, v15, v64, v25 ; D2820019 0466810F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v15, v10, v12 ; D282000A 0432150F v_mad_f32 v3, s30, v5, v3 ; D2820003 040E0A1E v_mad_f32 v2, s29, v6, v2 ; D2820002 040A0C1D v_mad_f32 v4, s31, v7, v4 ; D2820004 04120E1F exp 15, 32, 0, 0, 0, v18, v19, v20, v21 ; F800020F 15141312 s_movk_i32 s8, 0xeb0 ; B0080EB0 s_buffer_load_dword s8, s[0:3], s8 ; C2040008 s_movk_i32 s9, 0xeb4 ; B0090EB4 s_buffer_load_dword s9, s[0:3], s9 ; C2048009 s_movk_i32 s29, 0xec0 ; B01D0EC0 s_buffer_load_dword s29, s[0:3], s29 ; C20E801D s_movk_i32 s30, 0xec4 ; B01E0EC4 s_buffer_load_dword s30, s[0:3], s30 ; C20F001E s_movk_i32 s31, 0xed0 ; B01F0ED0 s_buffer_load_dword s31, s[0:3], s31 ; C20F801F s_movk_i32 s32, 0xed4 ; B0200ED4 s_buffer_load_dword s32, s[0:3], s32 ; C2100020 s_movk_i32 s33, 0xedc ; B0210EDC s_buffer_load_dword s33, s[0:3], s33 ; C2108021 s_movk_i32 s34, 0xee0 ; B0220EE0 s_buffer_load_dword s34, s[0:3], s34 ; C2110022 s_movk_i32 s35, 0xee4 ; B0230EE4 s_buffer_load_dword s35, s[0:3], s35 ; C2118023 s_movk_i32 s36, 0xeec ; B0240EEC s_buffer_load_dword s36, s[0:3], s36 ; C2120024 s_movk_i32 s37, 0xf10 ; B0250F10 s_buffer_load_dword s37, s[0:3], s37 ; C2128025 s_movk_i32 s38, 0xf20 ; B0260F20 s_buffer_load_dword s0, s[0:3], s38 ; C2000026 exp 15, 33, 0, 0, 0, v16, v17, v1, v1 ; F800021F 01011110 v_mul_f32_e32 v5, v11, v2 ; 100A050B v_mul_f32_e32 v6, v24, v2 ; 100C0518 v_mul_f32_e32 v2, v23, v2 ; 10040517 v_mad_f32 v5, v3, v9, v5 ; D2820005 04161303 v_mad_f32 v6, v3, v8, v6 ; D2820006 041A1103 v_mad_f32 v2, v3, v22, v2 ; D2820002 040A2D03 v_mad_f32 v3, v4, v13, v5 ; D2820003 04161B04 v_mad_f32 v5, v4, v25, v6 ; D2820005 041A3304 v_mad_f32 v2, v4, v26, v2 ; D2820002 040A3504 v_add_f32_e32 v3, v14, v3 ; 0606070E v_mul_f32_e32 v4, s11, v3 ; 1008060B v_mul_f32_e32 v6, s6, v3 ; 100C0606 v_mul_f32_e32 v7, s12, v3 ; 100E060C v_mul_f32_e32 v3, s16, v3 ; 10060610 v_add_f32_e32 v5, v10, v5 ; 060A0B0A v_mad_f32 v4, s10, v5, v4 ; D2820004 04120A0A v_mad_f32 v6, s4, v5, v6 ; D2820006 041A0A04 v_mad_f32 v7, s13, v5, v7 ; D2820007 041E0A0D v_mad_f32 v3, s15, v5, v3 ; D2820003 040E0A0F v_add_f32_e32 v0, v0, v2 ; 06000500 v_mad_f32 v2, s14, v0, v4 ; D2820002 0412000E v_mad_f32 v4, s5, v0, v6 ; D2820004 041A0005 v_mad_f32 v5, s21, v0, v7 ; D2820005 041E0015 v_mad_f32 v0, s23, v0, v3 ; D2820000 040E0017 v_cmp_gt_f32_e64 s[2:3], |s28|, -|s28| ; D0080302 4000381C v_cndmask_b32_e64 v3, 0, 1.0, s[2:3] ; D2000003 0009E480 v_add_f32_e32 v2, s19, v2 ; 06040413 v_add_f32_e32 v4, s7, v4 ; 06080807 v_add_f32_e32 v5, s24, v5 ; 060A0A18 v_add_f32_e32 v0, s27, v0 ; 0600001B v_mul_f32_e32 v6, s17, v4 ; 100C0811 v_mad_f32 v6, s22, v2, v6 ; D2820006 041A0416 v_mad_f32 v6, s25, v5, v6 ; D2820006 041A0A19 v_mad_f32 v6, s26, v0, v6 ; D2820006 041A001A v_cmp_gt_f32_e32 vcc, 0, v6 ; 7C080C80 v_cndmask_b32_e64 v7, 0, 1.0, vcc ; D2000007 01A9E480 v_mul_f32_e32 v8, v3, v7 ; 10100F03 v_mad_f32 v3, -v7, v3, 1.0 ; D2820003 23CA0707 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v7, s0 ; 7E0E0200 v_mul_f32_e32 v6, v6, v3 ; 100C0706 v_madmk_f32_e32 v6, v8, v6, 0x358637bd ; 400C0D08 358637BD v_mad_f32 v7, s37, v6, v7 ; D2820007 041E0C25 exp 15, 34, 0, 0, 0, v7, v1, v1, v1 ; F800022F 01010107 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, s29, v4 ; 1002081D v_mul_f32_e32 v7, s30, v4 ; 100E081E v_mul_f32_e32 v4, s20, v4 ; 10080814 v_mad_f32 v1, s8, v2, v1 ; D2820001 04060408 v_mad_f32 v7, s9, v2, v7 ; D2820007 041E0409 v_mad_f32 v2, s18, v2, v4 ; D2820002 04120412 v_mad_f32 v1, s31, v5, v1 ; D2820001 04060A1F v_mad_f32 v4, s32, v5, v7 ; D2820004 041E0A20 v_mad_f32 v2, s33, v5, v2 ; D2820002 040A0A21 v_mad_f32 v1, s34, v0, v1 ; D2820001 04060022 v_mad_f32 v4, s35, v0, v4 ; D2820004 04120023 v_mad_f32 v0, s36, v0, v2 ; D2820000 040A0024 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_mad_f32 v0, 1.0, v8, v0 ; D2820000 040210F2 exp 15, 12, 0, 1, 0, v1, v4, v6, v0 ; F80008CF 00060401 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 68 Code Size: 1960 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[4], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0] IMM[0] FLT32 { -0.3333, 0.0000, 0.0000, 0.0000} 0: TEX TEMP[0], IN[0], SAMP[0], 2D 1: ADD TEMP[0], TEMP[0].xxxx, IMM[0].xxxx 2: KILL_IF TEMP[0] 3: MOV OUT[0], IN[1].xxxx 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %29 = bitcast float %26 to i32 %30 = bitcast float %27 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %32, <32 x i8> %23, <16 x i8> %25, i32 2) %34 = extractelement <4 x float> %33, i32 0 %35 = fadd float %34, 0xBFD554C980000000 %36 = fadd float %34, 0xBFD554C980000000 %37 = fadd float %34, 0xBFD554C980000000 %38 = fadd float %34, 0xBFD554C980000000 %39 = fcmp olt float %35, 0.000000e+00 %40 = fcmp olt float %36, 0.000000e+00 %41 = fcmp olt float %37, 0.000000e+00 %42 = fcmp olt float %38, 0.000000e+00 %43 = or i1 %42, %41 %44 = or i1 %43, %40 %45 = or i1 %44, %39 %46 = select i1 %45, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %46) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %28, float %28, float %28, float %28) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v0, v0, 0, 1, [m0] ; C8000400 v_interp_p2_f32 v0, [v0], v1, 0, 1, [m0] ; C8010401 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800100 00030102 v_mov_b32_e32 v2, 0xbeaaa64c ; 7E0402FF BEAAA64C s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v1, v1, v2 ; 06020501 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, 0, -1.0, vcc ; D2000001 01A9E680 v_cndmask_b32_e64 v1, v1, -1.0, vcc ; D2000001 01A9E701 v_cndmask_b32_e64 v1, v1, -1.0, vcc ; D2000001 01A9E701 v_cndmask_b32_e64 v1, v1, -1.0, vcc ; D2000001 01A9E701 v_cmpx_le_f32_e32 vcc, 0, v1 ; 7C260280 exp 15, 0, 0, 1, 1, v0, v0, v0, v0 ; F800180F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 120 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..6] DCL TEMP[0] 0: TEX TEMP[0], IN[0], SAMP[0], 2D 1: MUL TEMP[0].xyz, TEMP[0], CONST[6] 2: MAD OUT[0].xyz, IN[1], TEMP[0], CONST[0] 3: TEX TEMP[0], IN[0], SAMP[1], 2D 4: MUL TEMP[0].x, TEMP[0].xxxx, CONST[6].wwww 5: MUL OUT[0].w, TEMP[0].xxxx, IN[1].wwww 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %31 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0 %33 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %36 = bitcast <8 x i32> addrspace(2)* %35 to <32 x i8> addrspace(2)* %37 = load <32 x i8>, <32 x i8> addrspace(2)* %36, align 32, !tbaa !0 %38 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %39 = bitcast <4 x i32> addrspace(2)* %38 to <16 x i8> addrspace(2)* %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %47 = bitcast float %41 to i32 %48 = bitcast float %42 to i32 %49 = insertelement <2 x i32> undef, i32 %47, i32 0 %50 = insertelement <2 x i32> %49, i32 %48, i32 1 %51 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %50, <32 x i8> %32, <16 x i8> %34, i32 2) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = extractelement <4 x float> %51, i32 2 %55 = fmul float %52, %27 %56 = fmul float %53, %28 %57 = fmul float %54, %29 %58 = fmul float %43, %55 %59 = fadd float %58, %24 %60 = fmul float %44, %56 %61 = fadd float %60, %25 %62 = fmul float %45, %57 %63 = fadd float %62, %26 %64 = bitcast float %41 to i32 %65 = bitcast float %42 to i32 %66 = insertelement <2 x i32> undef, i32 %64, i32 0 %67 = insertelement <2 x i32> %66, i32 %65, i32 1 %68 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %67, <32 x i8> %37, <16 x i8> %40, i32 2) %69 = extractelement <4 x float> %68, i32 0 %70 = fmul float %69, %30 %71 = fmul float %70, %46 %72 = call i32 @llvm.SI.packf16(float %59, float %61) %73 = bitcast i32 %72 to float %74 = call i32 @llvm.SI.packf16(float %63, float %71) %75 = bitcast i32 %74 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %73, float %75, float %73, float %75) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_mov_b32 m0, s9 ; BEFC0309 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s12, s[0:3], 0x19 ; C2060119 s_buffer_load_dword s13, s[0:3], 0x1a ; C206811A s_buffer_load_dword s0, s[0:3], 0x1b ; C200011B v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx8 s[32:39], s[6:7], 0x8 ; C0D00708 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[16:19] ; F0800700 00860702 image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[20:23] ; F0800100 00A80102 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v2, s11, v7 ; 10040E0B v_mul_f32_e32 v3, s12, v8 ; 1006100C v_mul_f32_e32 v7, s13, v9 ; 100E120D v_mad_f32 v2, v4, v2, s8 ; D2820002 00220504 v_mad_f32 v3, v5, v3, s9 ; D2820003 00260705 v_mad_f32 v4, v6, v7, s10 ; D2820004 002A0F06 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 12 Code Size: 200 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL IN[2], TEXCOORD[4], PERSPECTIVE DCL IN[3], TEXCOORD[5], PERSPECTIVE DCL IN[4], TEXCOORD[6], PERSPECTIVE DCL IN[5], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..6] IMM[0] FLT32 { 2.0000, -1.0000, -0.0000, 0.0000} IMM[1] FLT32 { 8.0000, 0.0398, 0.0000, 340282346638528859811704183484516925440.0000} 0: DP3 TEMP[0].x, IN[4], IN[4] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[4], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[2], IN[2] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[2], TEMP[0].xxxx 8: TEX TEMP[3], IN[1], SAMP[1], 2D 9: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[0].x, TEMP[3], TEMP[3] 11: RSQ TEMP[0].x, TEMP[0].xxxx 12: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 13: MUL TEMP[4].xyz, TEMP[3], TEMP[0].xxxx 14: DP3 TEMP[1].w, TEMP[4], TEMP[1] 15: MUL TEMP[3].xyz, TEMP[1].wwww, TEMP[4] 16: MAD TEMP[1].xyz, TEMP[3], IMM[0].xxxx, -TEMP[1] 17: TEX TEMP[3], IN[0], SAMP[4], 2D 18: ADD TEMP[1].w, TEMP[3].xxxx, CONST[8].xxxx 19: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[8].yyyy 20: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].zzzz 21: POW TEMP[3].x, |TEMP[1].wwww|, CONST[8].zzzz 22: CMP TEMP[3].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[3].xxxx 23: DP3 TEMP[1].w, IN[3], IN[3] 24: RSQ TEMP[0], |TEMP[1].wwww| 25: MIN TEMP[2].w, IMM[1].wwww, TEMP[0] 26: MUL TEMP[5].xyz, TEMP[2].wwww, IN[3] 27: DP3 TEMP[2].w, TEMP[5], -CONST[9] 28: ADD TEMP[2].w, TEMP[2].wwww, -CONST[10].xxxx 29: MUL_SAT TEMP[2].w, TEMP[2].wwww, CONST[10].yyyy 30: MUL TEMP[2].w, TEMP[2].wwww, TEMP[2].wwww 31: ADD TEMP[1].w, -TEMP[1].wwww, -IMM[0].yyyy 32: MUL TEMP[3].w, TEMP[3].zzzz, TEMP[1].wwww 33: MUL TEMP[5], TEMP[2].wwww, TEMP[3].wwww 34: MOV TEMP[6], TEMP[5] 35: KILL_IF TEMP[6] 36: UIF CONST[240].xxxx :42 37: RCP TEMP[3].w, IN[5].wwww 38: MUL TEMP[5].xy, TEMP[3].wwww, IN[5] 39: MAD TEMP[5].xy, TEMP[5], CONST[1], CONST[1].wzzw 40: TEX TEMP[6], TEMP[5], SAMP[0], 2D 41: MUL TEMP[3].xyz, TEMP[3].zzzz, TEMP[6] 42: ENDIF 43: MOV TEMP[5].y, IMM[0].yyyy 44: ADD TEMP[5].xyz, -TEMP[5].yyyy, -CONST[0] 45: TEX TEMP[6], IN[1], SAMP[2], 2D 46: MUL TEMP[5].xyz, TEMP[5], TEMP[6] 47: MAD TEMP[5].xyz, TEMP[5], CONST[4].wwww, CONST[4] 48: MAX TEMP[3].w, TEMP[1].wwww, IMM[0].wwww 49: ADD TEMP[1].w, TEMP[3].wwww, IMM[0].zzzz 50: POW TEMP[4].w, |TEMP[3].wwww|, CONST[7].wwww 51: TEX TEMP[6], IN[1], SAMP[3], 2D 52: MAD TEMP[6].xyz, TEMP[6], CONST[5].wwww, CONST[5] 53: DP3_SAT TEMP[3].w, TEMP[4], TEMP[2] 54: ADD TEMP[4].x, TEMP[3].wwww, IMM[0].zzzz 55: DP3_SAT TEMP[1].x, TEMP[1], TEMP[2] 56: ADD TEMP[1].y, TEMP[1].xxxx, IMM[0].zzzz 57: POW TEMP[2].x, |TEMP[1].xxxx|, CONST[6].zzzz 58: MOV TEMP[1].z, CONST[6].zzzz 59: ADD TEMP[1].x, TEMP[1].zzzz, IMM[1].xxxx 60: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 61: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 62: MUL TEMP[2].xyz, TEMP[3].wwww, TEMP[5] 63: CMP TEMP[2].xyz, TEMP[4].xxxx, IMM[0].wwww, TEMP[2] 64: MUL TEMP[4].xyz, TEMP[6], TEMP[1].xxxx 65: CMP TEMP[1], TEMP[1].yyyw, IMM[0].wwww, TEMP[4] 66: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 67: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1] 68: MUL TEMP[1].xyz, TEMP[3], TEMP[1] 69: MUL TEMP[1].xyz, TEMP[1], CONST[7] 70: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[1] 71: CMP OUT[0].xyz, -TEMP[5].wwww, TEMP[1], IMM[0].wwww 72: MOV OUT[0].w, IMM[0].wwww 73: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %49 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %50 = load <32 x i8>, <32 x i8> addrspace(2)* %49, align 32, !tbaa !0 %51 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %54 = bitcast <8 x i32> addrspace(2)* %53 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %57 = bitcast <4 x i32> addrspace(2)* %56 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %60 = bitcast <8 x i32> addrspace(2)* %59 to <32 x i8> addrspace(2)* %61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %63 = bitcast <4 x i32> addrspace(2)* %62 to <16 x i8> addrspace(2)* %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %72 = bitcast <8 x i32> addrspace(2)* %71 to <32 x i8> addrspace(2)* %73 = load <32 x i8>, <32 x i8> addrspace(2)* %72, align 32, !tbaa !0 %74 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %75 = bitcast <4 x i32> addrspace(2)* %74 to <16 x i8> addrspace(2)* %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 %77 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %90 = fmul float %87, %87 %91 = fmul float %88, %88 %92 = fadd float %91, %90 %93 = fmul float %89, %89 %94 = fadd float %92, %93 %95 = call float @llvm.AMDGPU.rsq.clamped.f32(float %94) %96 = call float @llvm.minnum.f32(float %95, float 0x47EFFFFFE0000000) %97 = fmul float %87, %96 %98 = fmul float %88, %96 %99 = fmul float %89, %96 %100 = fmul float %81, %81 %101 = fmul float %82, %82 %102 = fadd float %101, %100 %103 = fmul float %83, %83 %104 = fadd float %102, %103 %105 = call float @llvm.AMDGPU.rsq.clamped.f32(float %104) %106 = call float @llvm.minnum.f32(float %105, float 0x47EFFFFFE0000000) %107 = fmul float %81, %106 %108 = fmul float %82, %106 %109 = fmul float %83, %106 %110 = bitcast float %79 to i32 %111 = bitcast float %80 to i32 %112 = insertelement <2 x i32> undef, i32 %110, i32 0 %113 = insertelement <2 x i32> %112, i32 %111, i32 1 %114 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %113, <32 x i8> %55, <16 x i8> %58, i32 2) %115 = extractelement <4 x float> %114, i32 0 %116 = extractelement <4 x float> %114, i32 1 %117 = extractelement <4 x float> %114, i32 2 %118 = fmul float %115, 2.000000e+00 %119 = fadd float %118, -1.000000e+00 %120 = fmul float %116, 2.000000e+00 %121 = fadd float %120, -1.000000e+00 %122 = fmul float %117, 2.000000e+00 %123 = fadd float %122, -1.000000e+00 %124 = fmul float %119, %119 %125 = fmul float %121, %121 %126 = fadd float %125, %124 %127 = fmul float %123, %123 %128 = fadd float %126, %127 %129 = call float @llvm.AMDGPU.rsq.clamped.f32(float %128) %130 = call float @llvm.minnum.f32(float %129, float 0x47EFFFFFE0000000) %131 = fmul float %119, %130 %132 = fmul float %121, %130 %133 = fmul float %123, %130 %134 = fmul float %131, %97 %135 = fmul float %132, %98 %136 = fadd float %135, %134 %137 = fmul float %133, %99 %138 = fadd float %136, %137 %139 = fmul float %138, %131 %140 = fmul float %138, %132 %141 = fmul float %138, %133 %142 = fmul float %139, 2.000000e+00 %143 = fsub float %142, %97 %144 = fmul float %140, 2.000000e+00 %145 = fsub float %144, %98 %146 = fmul float %141, 2.000000e+00 %147 = fsub float %146, %99 %148 = bitcast float %77 to i32 %149 = bitcast float %78 to i32 %150 = insertelement <2 x i32> undef, i32 %148, i32 0 %151 = insertelement <2 x i32> %150, i32 %149, i32 1 %152 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %151, <32 x i8> %73, <16 x i8> %76, i32 2) %153 = extractelement <4 x float> %152, i32 0 %154 = fadd float %153, %40 %155 = fmul float %154, %41 %156 = call float @llvm.AMDIL.clamp.(float %155, float 0.000000e+00, float 1.000000e+00) %157 = fadd float %156, 0xBEB0C6F7A0000000 %158 = call float @fabs(float %156) %159 = call float @llvm.pow.f32(float %158, float %42) %160 = call float @llvm.AMDGPU.cndlt(float %157, float 0.000000e+00, float %159) %161 = call float @llvm.AMDGPU.cndlt(float %157, float 0.000000e+00, float %159) %162 = call float @llvm.AMDGPU.cndlt(float %157, float 0.000000e+00, float %159) %163 = fmul float %84, %84 %164 = fmul float %85, %85 %165 = fadd float %164, %163 %166 = fmul float %86, %86 %167 = fadd float %165, %166 %168 = call float @fabs(float %167) %169 = call float @llvm.AMDGPU.rsq.clamped.f32(float %168) %170 = call float @llvm.minnum.f32(float %169, float 0x47EFFFFFE0000000) %171 = fmul float %170, %84 %172 = fmul float %170, %85 %173 = fmul float %170, %86 %174 = fmul float %43, %171 %175 = fsub float -0.000000e+00, %174 %176 = fmul float %44, %172 %177 = fsub float %175, %176 %178 = fmul float %45, %173 %179 = fsub float %177, %178 %180 = fsub float %179, %46 %181 = fmul float %180, %47 %182 = call float @llvm.AMDIL.clamp.(float %181, float 0.000000e+00, float 1.000000e+00) %183 = fmul float %182, %182 %184 = fsub float 1.000000e+00, %167 %185 = fmul float %162, %184 %186 = fmul float %183, %185 %187 = fmul float %183, %185 %188 = fmul float %183, %185 %189 = fmul float %183, %185 %190 = fcmp olt float %186, 0.000000e+00 %191 = fcmp olt float %187, 0.000000e+00 %192 = fcmp olt float %188, 0.000000e+00 %193 = fcmp olt float %189, 0.000000e+00 %194 = or i1 %193, %192 %195 = or i1 %194, %191 %196 = or i1 %195, %190 %197 = select i1 %196, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %197) %198 = bitcast float %48 to i32 %199 = icmp eq i32 %198, 0 br i1 %199, label %ENDIF, label %IF IF: ; preds = %main_body %200 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %201 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %202 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %203 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %204 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %205 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %206 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %207 = fdiv float 1.000000e+00, %200 %208 = fmul float %207, %202 %209 = fmul float %207, %201 %210 = fmul float %208, %206 %211 = fadd float %210, %203 %212 = fmul float %209, %205 %213 = fadd float %212, %204 %214 = bitcast float %211 to i32 %215 = bitcast float %213 to i32 %216 = insertelement <2 x i32> undef, i32 %214, i32 0 %217 = insertelement <2 x i32> %216, i32 %215, i32 1 %218 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %217, <32 x i8> %50, <16 x i8> %52, i32 2) %219 = extractelement <4 x float> %218, i32 0 %220 = extractelement <4 x float> %218, i32 1 %221 = extractelement <4 x float> %218, i32 2 %222 = fmul float %162, %219 %223 = fmul float %162, %220 %224 = fmul float %162, %221 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp12.0 = phi float [ %222, %IF ], [ %160, %main_body ] %temp13.0 = phi float [ %223, %IF ], [ %161, %main_body ] %temp14.0 = phi float [ %224, %IF ], [ %162, %main_body ] %225 = fsub float 1.000000e+00, %24 %226 = fsub float 1.000000e+00, %25 %227 = fsub float 1.000000e+00, %26 %228 = bitcast float %79 to i32 %229 = bitcast float %80 to i32 %230 = insertelement <2 x i32> undef, i32 %228, i32 0 %231 = insertelement <2 x i32> %230, i32 %229, i32 1 %232 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %231, <32 x i8> %61, <16 x i8> %64, i32 2) %233 = extractelement <4 x float> %232, i32 0 %234 = extractelement <4 x float> %232, i32 1 %235 = extractelement <4 x float> %232, i32 2 %236 = fmul float %225, %233 %237 = fmul float %226, %234 %238 = fmul float %227, %235 %239 = fmul float %236, %30 %240 = fadd float %239, %27 %241 = fmul float %237, %30 %242 = fadd float %241, %28 %243 = fmul float %238, %30 %244 = fadd float %243, %29 %245 = call float @llvm.maxnum.f32(float %184, float 0.000000e+00) %246 = fadd float %245, 0xBEB0C6F7A0000000 %247 = call float @fabs(float %245) %248 = call float @llvm.pow.f32(float %247, float %39) %249 = bitcast float %79 to i32 %250 = bitcast float %80 to i32 %251 = insertelement <2 x i32> undef, i32 %249, i32 0 %252 = insertelement <2 x i32> %251, i32 %250, i32 1 %253 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %252, <32 x i8> %67, <16 x i8> %70, i32 2) %254 = extractelement <4 x float> %253, i32 0 %255 = extractelement <4 x float> %253, i32 1 %256 = extractelement <4 x float> %253, i32 2 %257 = fmul float %254, %34 %258 = fadd float %257, %31 %259 = fmul float %255, %34 %260 = fadd float %259, %32 %261 = fmul float %256, %34 %262 = fadd float %261, %33 %263 = fmul float %131, %107 %264 = fmul float %132, %108 %265 = fadd float %264, %263 %266 = fmul float %133, %109 %267 = fadd float %265, %266 %268 = call float @llvm.AMDIL.clamp.(float %267, float 0.000000e+00, float 1.000000e+00) %269 = fadd float %268, 0xBEB0C6F7A0000000 %270 = fmul float %143, %107 %271 = fmul float %145, %108 %272 = fadd float %271, %270 %273 = fmul float %147, %109 %274 = fadd float %272, %273 %275 = call float @llvm.AMDIL.clamp.(float %274, float 0.000000e+00, float 1.000000e+00) %276 = fadd float %275, 0xBEB0C6F7A0000000 %277 = call float @fabs(float %275) %278 = call float @llvm.pow.f32(float %277, float %35) %279 = fadd float %35, 8.000000e+00 %280 = fmul float %279, %278 %281 = fmul float %280, 0x3FA45F3060000000 %282 = fmul float %268, %240 %283 = fmul float %268, %242 %284 = fmul float %268, %244 %285 = call float @llvm.AMDGPU.cndlt(float %269, float 0.000000e+00, float %282) %286 = call float @llvm.AMDGPU.cndlt(float %269, float 0.000000e+00, float %283) %287 = call float @llvm.AMDGPU.cndlt(float %269, float 0.000000e+00, float %284) %288 = fmul float %258, %281 %289 = fmul float %260, %281 %290 = fmul float %262, %281 %291 = call float @llvm.AMDGPU.cndlt(float %276, float 0.000000e+00, float %288) %292 = call float @llvm.AMDGPU.cndlt(float %276, float 0.000000e+00, float %289) %293 = call float @llvm.AMDGPU.cndlt(float %276, float 0.000000e+00, float %290) %294 = call float @llvm.AMDGPU.cndlt(float %246, float 0.000000e+00, float %248) %295 = fadd float %291, %285 %296 = fadd float %292, %286 %297 = fadd float %293, %287 %298 = fmul float %294, %295 %299 = fmul float %294, %296 %300 = fmul float %294, %297 %301 = fmul float %temp12.0, %298 %302 = fmul float %temp13.0, %299 %303 = fmul float %temp14.0, %300 %304 = fmul float %301, %36 %305 = fmul float %302, %37 %306 = fmul float %303, %38 %307 = fmul float %183, %304 %308 = fmul float %183, %305 %309 = fmul float %183, %306 %310 = fsub float -0.000000e+00, %189 %311 = call float @llvm.AMDGPU.cndlt(float %310, float %307, float 0.000000e+00) %312 = fsub float -0.000000e+00, %189 %313 = call float @llvm.AMDGPU.cndlt(float %312, float %308, float 0.000000e+00) %314 = fsub float -0.000000e+00, %189 %315 = call float @llvm.AMDGPU.cndlt(float %314, float %309, float 0.000000e+00) %316 = call i32 @llvm.SI.packf16(float %311, float %313) %317 = bitcast i32 %316 to float %318 = call i32 @llvm.SI.packf16(float %315, float 0.000000e+00) %319 = bitcast i32 %318 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %317, float %319, float %317, float %319) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[16:19], s[2:3], 0x0 ; C0880300 s_movk_i32 s0, 0xf00 ; B0000F00 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v10, 0xb58637bd ; 7E1402FF B58637BD v_interp_p1_f32 v11, v0, 0, 0, [m0] ; C82C0000 v_interp_p2_f32 v11, [v11], v1, 0, 0, [m0] ; C82D0001 v_interp_p1_f32 v12, v0, 1, 0, [m0] ; C8300100 v_interp_p2_f32 v12, [v12], v1, 1, 0, [m0] ; C8310101 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s44, s[16:19], s0 ; C2161000 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 s_buffer_load_dword s13, s[16:19], 0x0 ; C2069100 s_buffer_load_dword s12, s[16:19], 0x1 ; C2061101 s_buffer_load_dword s11, s[16:19], 0x2 ; C2059102 s_buffer_load_dword s1, s[16:19], 0x10 ; C2009110 s_buffer_load_dword s2, s[16:19], 0x11 ; C2011111 s_buffer_load_dword s3, s[16:19], 0x12 ; C2019112 s_buffer_load_dword s45, s[16:19], 0x13 ; C2169113 s_buffer_load_dword s8, s[16:19], 0x14 ; C2041114 s_buffer_load_dword s9, s[16:19], 0x15 ; C2049115 s_buffer_load_dword s10, s[16:19], 0x16 ; C2051116 s_buffer_load_dword s46, s[16:19], 0x17 ; C2171117 s_buffer_load_dword s0, s[16:19], 0x1a ; C200111A v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800 v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801 v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900 v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901 v_interp_p1_f32 v2, v0, 2, 2, [m0] ; C8080A00 v_interp_p2_f32 v2, [v2], v1, 2, 2, [m0] ; C8090A01 v_interp_p1_f32 v13, v0, 0, 3, [m0] ; C8340C00 v_interp_p2_f32 v13, [v13], v1, 0, 3, [m0] ; C8350C01 s_buffer_load_dword s14, s[16:19], 0x25 ; C2071125 v_interp_p1_f32 v17, v0, 1, 3, [m0] ; C8440D00 v_interp_p2_f32 v17, [v17], v1, 1, 3, [m0] ; C8450D01 v_interp_p1_f32 v18, v0, 2, 3, [m0] ; C8480E00 v_interp_p2_f32 v18, [v18], v1, 2, 3, [m0] ; C8490E01 v_interp_p1_f32 v7, v0, 0, 4, [m0] ; C81C1000 v_interp_p2_f32 v7, [v7], v1, 0, 4, [m0] ; C81D1001 v_interp_p1_f32 v6, v0, 1, 4, [m0] ; C8181100 v_interp_p2_f32 v6, [v6], v1, 1, 4, [m0] ; C8191101 v_interp_p1_f32 v5, v0, 2, 4, [m0] ; C8141200 v_interp_p2_f32 v5, [v5], v1, 2, 4, [m0] ; C8151201 s_load_dwordx4 s[24:27], s[4:5], 0x10 ; C08C0510 s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708 s_load_dwordx8 s[28:35], s[6:7], 0x20 ; C0CE0720 s_buffer_load_dword s15, s[16:19], 0x20 ; C2079120 s_buffer_load_dword s47, s[16:19], 0x21 ; C2179121 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[19:21], 7, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[36:43], s[20:23] ; F0800700 00A91303 image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[28:35], s[24:27] ; F0800100 00C70B0B s_buffer_load_dword s20, s[16:19], 0x22 ; C20A1122 s_buffer_load_dword s21, s[16:19], 0x24 ; C20A9124 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v11, s15, v11 ; 0616160F v_mul_f32_e32 v11, s47, v11 ; 1016162F v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_and_b32_e32 v12, 0x7fffffff, v11 ; 361816FF 7FFFFFFF v_log_f32_e32 v12, v12 ; 7E184F0C v_add_f32_e32 v10, v11, v10 ; 0614150B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_legacy_f32_e32 v11, s20, v12 ; 0E161814 v_exp_f32_e32 v11, v11 ; 7E164B0B v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v10, v11, 0, vcc ; D200000A 01A9010B v_mul_f32_e32 v11, v13, v13 ; 10161B0D v_mad_f32 v11, v17, v17, v11 ; D282000B 042E2311 v_mad_f32 v12, v18, v18, v11 ; D282000C 042E2512 s_buffer_load_dword s15, s[16:19], 0x26 ; C2079126 s_buffer_load_dword s20, s[16:19], 0x28 ; C20A1128 s_buffer_load_dword s22, s[16:19], 0x29 ; C20B1129 v_rsq_clamp_f32_e64 v11, |v12| ; D358010B 0000010C v_mad_f32 v14, 2.0, v19, -1.0 ; D282000E 03CE26F4 v_mad_f32 v15, 2.0, v20, -1.0 ; D282000F 03CE28F4 v_mad_f32 v16, 2.0, v21, -1.0 ; D2820010 03CE2AF4 v_min_f32_e32 v11, 0x7f7fffff, v11 ; 1E1616FF 7F7FFFFF v_mul_f32_e32 v13, v13, v11 ; 101A170D v_mul_f32_e32 v13, s21, v13 ; 101A1A15 v_mul_f32_e32 v17, v17, v11 ; 10221711 v_mad_f32 v13, -s14, v17, -v13 ; D282000D A436220E v_mul_f32_e32 v11, v18, v11 ; 10161712 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v11, -s15, v11, v13 ; D282000B 2436160F v_subrev_f32_e32 v11, s20, v11 ; 0A161614 v_mul_f32_e32 v11, s22, v11 ; 10161616 v_mul_f32_e32 v13, v7, v7 ; 101A0F07 v_mad_f32 v13, v6, v6, v13 ; D282000D 04360D06 v_mad_f32 v13, v5, v5, v13 ; D282000D 04360B05 v_rsq_clamp_f32_e32 v21, v13 ; 7E2A590D v_mul_f32_e32 v13, v8, v8 ; 101A1108 v_mad_f32 v13, v9, v9, v13 ; D282000D 04361309 v_mad_f32 v13, v2, v2, v13 ; D282000D 04360502 v_rsq_clamp_f32_e32 v22, v13 ; 7E2C590D v_mul_f32_e32 v13, v14, v14 ; 101A1D0E v_mad_f32 v13, v15, v15, v13 ; D282000D 04361F0F v_mad_f32 v13, v16, v16, v13 ; D282000D 04362110 v_rsq_clamp_f32_e32 v23, v13 ; 7E2E590D v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_mul_f32_e32 v11, v11, v11 ; 1016170B v_sub_f32_e32 v13, 1.0, v12 ; 081A18F2 s_buffer_load_dword s21, s[16:19], 0x1f ; C20A911F s_buffer_load_dword s20, s[16:19], 0x1c ; C20A111C s_buffer_load_dword s15, s[16:19], 0x1d ; C207911D s_buffer_load_dword s14, s[16:19], 0x1e ; C207111E v_mul_f32_e32 v12, v13, v10 ; 1018150D v_mul_f32_e32 v12, v12, v11 ; 1018170C v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v17, 0, -1.0, vcc ; D2000011 01A9E680 v_cndmask_b32_e64 v17, v17, -1.0, vcc ; D2000011 01A9E711 v_cndmask_b32_e64 v17, v17, -1.0, vcc ; D2000011 01A9E711 v_cndmask_b32_e64 v17, v17, -1.0, vcc ; D2000011 01A9E711 v_cmpx_le_f32_e32 vcc, 0, v17 ; 7C262280 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_mov_b32_e32 v17, s45 ; 7E22022D v_mov_b32_e32 v20, s46 ; 7E28022E v_cmp_ne_i32_e64 s[22:23], 0, s44 ; D10A0016 00005880 v_mov_b32_e32 v18, v10 ; 7E24030A v_mov_b32_e32 v19, v10 ; 7E26030A s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[22:23], s[22:23] ; BE962416 s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E s_cbranch_execz BB0_2 ; BF880000 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 v_interp_p1_f32 v18, v0, 3, 5, [m0] ; C8481700 s_buffer_load_dword s36, s[16:19], 0x6 ; C2121106 s_buffer_load_dword s37, s[16:19], 0x7 ; C2129107 s_buffer_load_dword s38, s[16:19], 0x4 ; C2131104 s_buffer_load_dword s39, s[16:19], 0x5 ; C2139105 v_interp_p2_f32 v18, [v18], v1, 3, 5, [m0] ; C8491701 v_interp_p1_f32 v19, v0, 1, 5, [m0] ; C84C1500 v_rcp_f32_e32 v18, v18 ; 7E245512 v_interp_p2_f32 v19, [v19], v1, 1, 5, [m0] ; C84D1501 v_interp_p1_f32 v0, v0, 0, 5, [m0] ; C8001400 v_interp_p2_f32 v0, [v0], v1, 0, 5, [m0] ; C8011401 v_mul_f32_e32 v0, v0, v18 ; 10002500 v_mul_f32_e32 v1, v19, v18 ; 10022513 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v18, s37 ; 7E240225 v_mad_f32 v18, s38, v0, v18 ; D2820012 044A0026 v_mov_b32_e32 v0, s36 ; 7E000224 v_mad_f32 v19, s39, v1, v0 ; D2820013 04020227 image_sample v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[24:31], s[32:35] ; F0800700 01061812 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v18, v24, v10 ; 10241518 v_mul_f32_e32 v19, v25, v10 ; 10261519 v_mul_f32_e32 v10, v26, v10 ; 1014151A s_or_b64 exec, exec, s[22:23] ; 88FE167E v_min_f32_e32 v0, 0x7f7fffff, v21 ; 1E002AFF 7F7FFFFF v_min_f32_e32 v1, 0x7f7fffff, v22 ; 1E022CFF 7F7FFFFF v_mul_f32_e32 v8, v1, v8 ; 10101101 v_mul_f32_e32 v9, v1, v9 ; 10121301 v_min_f32_e32 v21, 0x7f7fffff, v23 ; 1E2A2EFF 7F7FFFFF s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C s_load_dwordx8 s[28:35], s[6:7], 0x10 ; C0CE0710 s_load_dwordx8 s[36:43], s[6:7], 0x18 ; C0D20718 v_sub_f32_e64 v22, 1.0, s13 ; D2080016 00001AF2 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[28:35], s[16:19] ; F0800700 00871703 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v22, v23, v22 ; 102C2D17 v_sub_f32_e64 v23, 1.0, s12 ; D2080017 000018F2 v_mul_f32_e32 v23, v24, v23 ; 102E2F18 v_sub_f32_e64 v24, 1.0, s11 ; D2080018 000016F2 v_mul_f32_e32 v24, v25, v24 ; 10303119 image_sample v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[36:43], s[24:27] ; F0800700 00C91903 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v20, v25, s8 ; D2820003 00223314 v_mad_f32 v4, v26, v20, s9 ; D2820004 0026291A v_mad_f32 v20, v27, v20, s10 ; D2820014 002A291B v_mul_f32_e32 v14, v21, v14 ; 101C1D15 v_mul_f32_e32 v15, v21, v15 ; 101E1F15 v_mul_f32_e32 v16, v21, v16 ; 10202115 v_mul_f32_e32 v21, v0, v7 ; 102A0F00 v_mul_f32_e32 v21, v21, v14 ; 102A1D15 v_mul_f32_e32 v25, v0, v6 ; 10320D00 v_mad_f32 v21, v15, v25, v21 ; D2820015 0456330F v_mul_f32_e32 v25, v0, v5 ; 10320B00 v_mad_f32 v21, v16, v25, v21 ; D2820015 04563310 v_mul_f32_e32 v25, v14, v21 ; 10322B0E v_mad_f32 v25, v21, v14, v25 ; D2820019 04661D15 v_mad_f32 v7, -v7, v0, v25 ; D2820007 24660107 v_mul_f32_e32 v25, v15, v21 ; 10322B0F v_mad_f32 v25, v21, v15, v25 ; D2820019 04661F15 v_mad_f32 v6, -v6, v0, v25 ; D2820006 24660106 v_mul_f32_e32 v25, v16, v21 ; 10322B10 v_mad_f32 v21, v21, v16, v25 ; D2820015 04662115 v_mad_f32 v0, -v5, v0, v21 ; D2820000 24560105 v_mad_f32 v5, v17, v22, s1 ; D2820005 00062D11 v_mad_f32 v21, v23, v17, s2 ; D2820015 000A2317 v_mad_f32 v17, v24, v17, s3 ; D2820011 000E2318 v_mul_f32_e32 v14, v8, v14 ; 101C1D08 v_mad_f32 v14, v15, v9, v14 ; D282000E 043A130F v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mad_f32 v6, v6, v9, v7 ; D2820006 041E1306 v_mul_f32_e32 v1, v1, v2 ; 10020501 v_mad_f32 v2, v16, v1, v14 ; D2820002 043A0310 v_mad_f32 v0, v0, v1, v6 ; D2820000 041A0300 v_add_f32_e64 v1, 0, v2 clamp ; D2060801 00020480 v_mul_f32_e32 v2, v5, v1 ; 10040305 v_mul_f32_e32 v5, v21, v1 ; 100A0315 v_mul_f32_e32 v6, v17, v1 ; 100C0311 v_mov_b32_e32 v7, 0xb58637bd ; 7E0E02FF B58637BD v_add_f32_e32 v1, v7, v1 ; 06020307 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, v2, 0, vcc ; D2000001 01A90102 v_cndmask_b32_e64 v2, v5, 0, vcc ; D2000002 01A90105 v_cndmask_b32_e64 v5, v6, 0, vcc ; D2000005 01A90106 v_max_f32_e32 v6, 0, v13 ; 200C1A80 v_mov_b32_e32 v8, 0x7fffffff ; 7E1002FF 7FFFFFFF v_and_b32_e32 v9, v6, v8 ; 36121106 v_log_f32_e32 v9, v9 ; 7E124F09 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_and_b32_e32 v8, v0, v8 ; 36101100 v_log_f32_e32 v8, v8 ; 7E104F08 v_mul_legacy_f32_e32 v9, s21, v9 ; 0E121215 v_add_f32_e32 v6, v7, v6 ; 060C0D07 v_add_f32_e32 v0, v7, v0 ; 06000107 v_mul_legacy_f32_e32 v7, s0, v8 ; 0E0E1000 v_mov_b32_e32 v8, 0x41000000 ; 7E1002FF 41000000 v_add_f32_e32 v8, s0, v8 ; 06101000 v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_mul_f32_e32 v7, v7, v8 ; 100E1107 v_exp_f32_e32 v8, v9 ; 7E104B09 v_mul_f32_e32 v7, 0x3d22f983, v7 ; 100E0EFF 3D22F983 v_mul_f32_e32 v3, v7, v3 ; 10060707 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mul_f32_e32 v7, v7, v20 ; 100E2907 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v3, 0, vcc ; D2000000 01A90103 v_cndmask_b32_e64 v3, v4, 0, vcc ; D2000003 01A90104 v_cndmask_b32_e64 v4, v7, 0, vcc ; D2000004 01A90107 v_cmp_gt_f32_e32 vcc, 0, v6 ; 7C080C80 v_cndmask_b32_e64 v6, v8, 0, vcc ; D2000006 01A90108 v_add_f32_e32 v0, v1, v0 ; 06000101 v_add_f32_e32 v1, v2, v3 ; 06020702 v_add_f32_e32 v2, v5, v4 ; 06040905 v_mul_f32_e32 v0, v0, v6 ; 10000D00 v_mul_f32_e32 v1, v1, v6 ; 10020D01 v_mul_f32_e32 v2, v2, v6 ; 10040D02 v_mul_f32_e32 v0, v0, v18 ; 10002500 v_mul_f32_e32 v1, v1, v19 ; 10022701 v_mul_f32_e32 v2, v2, v10 ; 10041502 v_mul_f32_e32 v0, s20, v0 ; 10000014 v_mul_f32_e32 v1, s15, v1 ; 1002020F v_mul_f32_e32 v2, s14, v2 ; 1004040E v_mul_f32_e32 v0, v0, v11 ; 10001700 v_mul_f32_e32 v1, v1, v11 ; 10021701 v_mul_f32_e32 v2, v2, v11 ; 10041702 v_xor_b32_e32 v3, 0x80000000, v12 ; 3A0618FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v0, 0, v0, vcc ; D2000000 01AA0080 v_cndmask_b32_e64 v1, 0, v1, vcc ; D2000001 01AA0280 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cndmask_b32_e64 v1, 0, v2, vcc ; D2000001 01AA0480 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 28 Code Size: 1364 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL IN[2], TEXCOORD[4], PERSPECTIVE DCL IN[3], TEXCOORD[6], PERSPECTIVE DCL IN[4], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..6] IMM[0] FLT32 { 2.0000, -1.0000, -0.0000, 0.0000} IMM[1] FLT32 { 8.0000, 0.0398, 0.0000, 340282346638528859811704183484516925440.0000} 0: DP3 TEMP[0].x, IN[3], IN[3] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[3], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[2], IN[2] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[2], TEMP[0].xxxx 8: TEX TEMP[3], IN[1], SAMP[1], 2D 9: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[0].x, TEMP[3], TEMP[3] 11: RSQ TEMP[0].x, TEMP[0].xxxx 12: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 13: MUL TEMP[4].xyz, TEMP[3], TEMP[0].xxxx 14: DP3 TEMP[1].w, TEMP[4], TEMP[1] 15: MUL TEMP[3].xyz, TEMP[1].wwww, TEMP[4] 16: MAD TEMP[1].xyz, TEMP[3], IMM[0].xxxx, -TEMP[1] 17: TEX TEMP[3], IN[0], SAMP[4], 2D 18: ADD TEMP[1].w, TEMP[3].xxxx, CONST[13].xxxx 19: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[13].yyyy 20: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].zzzz 21: POW TEMP[3].x, |TEMP[1].wwww|, CONST[13].zzzz 22: CMP TEMP[3].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[3].xxxx 23: UIF CONST[240].xxxx :45 24: MUL TEMP[5].xyz, CONST[7].xyww, IN[4].yyyy 25: MAD TEMP[5].xyz, CONST[6].xyww, IN[4].xxxx, TEMP[5] 26: MAD TEMP[5].xyz, CONST[8].xyww, IN[4].zzzz, TEMP[5] 27: MAD TEMP[5].xyz, CONST[9].xyww, IN[4].wwww, TEMP[5] 28: RCP TEMP[1].w, TEMP[5].zzzz 29: MUL TEMP[5].xy, TEMP[1].wwww, TEMP[5] 30: MAD TEMP[5].xy, TEMP[5], CONST[1], CONST[1].wzzw 31: TEX TEMP[5], TEMP[5], SAMP[0], 2D 32: UIF CONST[240].yyyy :0 33: DP3 TEMP[1].w, IN[4], IN[4] 34: RSQ TEMP[0], |TEMP[1].wwww| 35: MIN TEMP[1].w, IMM[1].wwww, TEMP[0] 36: RCP TEMP[1].w, TEMP[1].wwww 37: ADD TEMP[1].w, -TEMP[1].wwww, CONST[0].xxxx 38: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[0].yyyy 39: MUL TEMP[1].w, TEMP[1].wwww, TEMP[1].wwww 40: LRP TEMP[6].xyz, TEMP[1].wwww, TEMP[5].wwww, TEMP[3].zzzz 41: ELSE :43 42: MOV TEMP[6].xyz, TEMP[3].zzzz 43: ENDIF 44: MUL TEMP[3].xyz, TEMP[5], TEMP[6] 45: ENDIF 46: MOV TEMP[5].y, IMM[0].yyyy 47: ADD TEMP[5].xyz, -TEMP[5].yyyy, -CONST[10] 48: TEX TEMP[6], IN[1], SAMP[2], 2D 49: MUL TEMP[5].xyz, TEMP[5], TEMP[6] 50: MAD TEMP[5].xyz, TEMP[5], CONST[4].wwww, CONST[4] 51: TEX TEMP[6], IN[1], SAMP[3], 2D 52: MAD TEMP[6].xyz, TEMP[6], CONST[5].wwww, CONST[5] 53: DP3_SAT TEMP[1].w, TEMP[4], TEMP[2] 54: ADD TEMP[2].w, TEMP[1].wwww, IMM[0].zzzz 55: DP3_SAT TEMP[1].x, TEMP[1], TEMP[2] 56: ADD TEMP[1].y, TEMP[1].xxxx, IMM[0].zzzz 57: POW TEMP[2].x, |TEMP[1].xxxx|, CONST[11].zzzz 58: MOV TEMP[1].z, CONST[11].zzzz 59: ADD TEMP[1].x, TEMP[1].zzzz, IMM[1].xxxx 60: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 61: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 62: MUL TEMP[2].xyz, TEMP[1].wwww, TEMP[5] 63: CMP TEMP[2].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[2] 64: MUL TEMP[1].xzw, TEMP[6].xyyz, TEMP[1].xxxx 65: CMP TEMP[1].xyz, TEMP[1].yyyy, IMM[0].wwww, TEMP[1].xzww 66: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 67: MUL TEMP[1].xyz, TEMP[3], TEMP[1] 68: MUL OUT[0].xyz, TEMP[1], CONST[12] 69: MOV OUT[0].w, IMM[0].wwww 70: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %45 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %46 = bitcast <8 x i32> addrspace(2)* %45 to <32 x i8> addrspace(2)* %47 = load <32 x i8>, <32 x i8> addrspace(2)* %46, align 32, !tbaa !0 %48 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %49 = bitcast <4 x i32> addrspace(2)* %48 to <16 x i8> addrspace(2)* %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 %51 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %52 = bitcast <8 x i32> addrspace(2)* %51 to <32 x i8> addrspace(2)* %53 = load <32 x i8>, <32 x i8> addrspace(2)* %52, align 32, !tbaa !0 %54 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %55 = bitcast <4 x i32> addrspace(2)* %54 to <16 x i8> addrspace(2)* %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 %57 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %58 = bitcast <8 x i32> addrspace(2)* %57 to <32 x i8> addrspace(2)* %59 = load <32 x i8>, <32 x i8> addrspace(2)* %58, align 32, !tbaa !0 %60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %61 = bitcast <4 x i32> addrspace(2)* %60 to <16 x i8> addrspace(2)* %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %64 = bitcast <8 x i32> addrspace(2)* %63 to <32 x i8> addrspace(2)* %65 = load <32 x i8>, <32 x i8> addrspace(2)* %64, align 32, !tbaa !0 %66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %67 = bitcast <4 x i32> addrspace(2)* %66 to <16 x i8> addrspace(2)* %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %71 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %83 = fmul float %76, %76 %84 = fmul float %77, %77 %85 = fadd float %84, %83 %86 = fmul float %78, %78 %87 = fadd float %85, %86 %88 = call float @llvm.AMDGPU.rsq.clamped.f32(float %87) %89 = call float @llvm.minnum.f32(float %88, float 0x47EFFFFFE0000000) %90 = fmul float %76, %89 %91 = fmul float %77, %89 %92 = fmul float %78, %89 %93 = fmul float %73, %73 %94 = fmul float %74, %74 %95 = fadd float %94, %93 %96 = fmul float %75, %75 %97 = fadd float %95, %96 %98 = call float @llvm.AMDGPU.rsq.clamped.f32(float %97) %99 = call float @llvm.minnum.f32(float %98, float 0x47EFFFFFE0000000) %100 = fmul float %73, %99 %101 = fmul float %74, %99 %102 = fmul float %75, %99 %103 = bitcast float %71 to i32 %104 = bitcast float %72 to i32 %105 = insertelement <2 x i32> undef, i32 %103, i32 0 %106 = insertelement <2 x i32> %105, i32 %104, i32 1 %107 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %106, <32 x i8> %47, <16 x i8> %50, i32 2) %108 = extractelement <4 x float> %107, i32 0 %109 = extractelement <4 x float> %107, i32 1 %110 = extractelement <4 x float> %107, i32 2 %111 = fmul float %108, 2.000000e+00 %112 = fadd float %111, -1.000000e+00 %113 = fmul float %109, 2.000000e+00 %114 = fadd float %113, -1.000000e+00 %115 = fmul float %110, 2.000000e+00 %116 = fadd float %115, -1.000000e+00 %117 = fmul float %112, %112 %118 = fmul float %114, %114 %119 = fadd float %118, %117 %120 = fmul float %116, %116 %121 = fadd float %119, %120 %122 = call float @llvm.AMDGPU.rsq.clamped.f32(float %121) %123 = call float @llvm.minnum.f32(float %122, float 0x47EFFFFFE0000000) %124 = fmul float %112, %123 %125 = fmul float %114, %123 %126 = fmul float %116, %123 %127 = fmul float %124, %90 %128 = fmul float %125, %91 %129 = fadd float %128, %127 %130 = fmul float %126, %92 %131 = fadd float %129, %130 %132 = fmul float %131, %124 %133 = fmul float %131, %125 %134 = fmul float %131, %126 %135 = fmul float %132, 2.000000e+00 %136 = fsub float %135, %90 %137 = fmul float %133, 2.000000e+00 %138 = fsub float %137, %91 %139 = fmul float %134, 2.000000e+00 %140 = fsub float %139, %92 %141 = bitcast float %69 to i32 %142 = bitcast float %70 to i32 %143 = insertelement <2 x i32> undef, i32 %141, i32 0 %144 = insertelement <2 x i32> %143, i32 %142, i32 1 %145 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %144, <32 x i8> %65, <16 x i8> %68, i32 2) %146 = extractelement <4 x float> %145, i32 0 %147 = fadd float %146, %41 %148 = fmul float %147, %42 %149 = call float @llvm.AMDIL.clamp.(float %148, float 0.000000e+00, float 1.000000e+00) %150 = fadd float %149, 0xBEB0C6F7A0000000 %151 = call float @fabs(float %149) %152 = call float @llvm.pow.f32(float %151, float %43) %153 = call float @llvm.AMDGPU.cndlt(float %150, float 0.000000e+00, float %152) %154 = call float @llvm.AMDGPU.cndlt(float %150, float 0.000000e+00, float %152) %155 = call float @llvm.AMDGPU.cndlt(float %150, float 0.000000e+00, float %152) %156 = bitcast float %44 to i32 %157 = icmp eq i32 %156, 0 br i1 %157, label %ENDIF, label %IF IF: ; preds = %main_body %158 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %159 = load <16 x i8>, <16 x i8> addrspace(2)* %158, align 16, !tbaa !0 %160 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %161 = load <32 x i8>, <32 x i8> addrspace(2)* %160, align 32, !tbaa !0 %162 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3844) %163 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %164 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %165 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %166 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %167 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %168 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %169 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %170 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %171 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %172 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %173 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %174 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %175 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %176 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %177 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %178 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %179 = fmul float %171, %80 %180 = fmul float %170, %80 %181 = fmul float %169, %80 %182 = fmul float %174, %79 %183 = fadd float %182, %179 %184 = fmul float %173, %79 %185 = fadd float %184, %180 %186 = fmul float %172, %79 %187 = fadd float %186, %181 %188 = fmul float %168, %81 %189 = fadd float %188, %183 %190 = fmul float %167, %81 %191 = fadd float %190, %185 %192 = fmul float %166, %81 %193 = fadd float %192, %187 %194 = fmul float %165, %82 %195 = fadd float %194, %189 %196 = fmul float %164, %82 %197 = fadd float %196, %191 %198 = fmul float %163, %82 %199 = fadd float %198, %193 %200 = fdiv float 1.000000e+00, %199 %201 = fmul float %200, %195 %202 = fmul float %200, %197 %203 = fmul float %201, %178 %204 = fadd float %203, %175 %205 = fmul float %202, %177 %206 = fadd float %205, %176 %207 = bitcast float %204 to i32 %208 = bitcast float %206 to i32 %209 = insertelement <2 x i32> undef, i32 %207, i32 0 %210 = insertelement <2 x i32> %209, i32 %208, i32 1 %211 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %210, <32 x i8> %161, <16 x i8> %159, i32 2) %212 = extractelement <4 x float> %211, i32 0 %213 = extractelement <4 x float> %211, i32 1 %214 = extractelement <4 x float> %211, i32 2 %215 = extractelement <4 x float> %211, i32 3 %216 = bitcast float %162 to i32 %217 = icmp eq i32 %216, 0 br i1 %217, label %ENDIF28, label %IF29 ENDIF: ; preds = %main_body, %ENDIF28 %temp12.0 = phi float [ %312, %ENDIF28 ], [ %153, %main_body ] %temp13.0 = phi float [ %313, %ENDIF28 ], [ %154, %main_body ] %temp14.0 = phi float [ %314, %ENDIF28 ], [ %155, %main_body ] %218 = fsub float 1.000000e+00, %34 %219 = fsub float 1.000000e+00, %35 %220 = fsub float 1.000000e+00, %36 %221 = bitcast float %71 to i32 %222 = bitcast float %72 to i32 %223 = insertelement <2 x i32> undef, i32 %221, i32 0 %224 = insertelement <2 x i32> %223, i32 %222, i32 1 %225 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %224, <32 x i8> %53, <16 x i8> %56, i32 2) %226 = extractelement <4 x float> %225, i32 0 %227 = extractelement <4 x float> %225, i32 1 %228 = extractelement <4 x float> %225, i32 2 %229 = fmul float %218, %226 %230 = fmul float %219, %227 %231 = fmul float %220, %228 %232 = fmul float %229, %29 %233 = fadd float %232, %26 %234 = fmul float %230, %29 %235 = fadd float %234, %27 %236 = fmul float %231, %29 %237 = fadd float %236, %28 %238 = bitcast float %71 to i32 %239 = bitcast float %72 to i32 %240 = insertelement <2 x i32> undef, i32 %238, i32 0 %241 = insertelement <2 x i32> %240, i32 %239, i32 1 %242 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %241, <32 x i8> %59, <16 x i8> %62, i32 2) %243 = extractelement <4 x float> %242, i32 0 %244 = extractelement <4 x float> %242, i32 1 %245 = extractelement <4 x float> %242, i32 2 %246 = fmul float %243, %33 %247 = fadd float %246, %30 %248 = fmul float %244, %33 %249 = fadd float %248, %31 %250 = fmul float %245, %33 %251 = fadd float %250, %32 %252 = fmul float %124, %100 %253 = fmul float %125, %101 %254 = fadd float %253, %252 %255 = fmul float %126, %102 %256 = fadd float %254, %255 %257 = call float @llvm.AMDIL.clamp.(float %256, float 0.000000e+00, float 1.000000e+00) %258 = fadd float %257, 0xBEB0C6F7A0000000 %259 = fmul float %136, %100 %260 = fmul float %138, %101 %261 = fadd float %260, %259 %262 = fmul float %140, %102 %263 = fadd float %261, %262 %264 = call float @llvm.AMDIL.clamp.(float %263, float 0.000000e+00, float 1.000000e+00) %265 = fadd float %264, 0xBEB0C6F7A0000000 %266 = call float @fabs(float %264) %267 = call float @llvm.pow.f32(float %266, float %37) %268 = fadd float %37, 8.000000e+00 %269 = fmul float %268, %267 %270 = fmul float %269, 0x3FA45F3060000000 %271 = fmul float %257, %233 %272 = fmul float %257, %235 %273 = fmul float %257, %237 %274 = call float @llvm.AMDGPU.cndlt(float %258, float 0.000000e+00, float %271) %275 = call float @llvm.AMDGPU.cndlt(float %258, float 0.000000e+00, float %272) %276 = call float @llvm.AMDGPU.cndlt(float %258, float 0.000000e+00, float %273) %277 = fmul float %247, %270 %278 = fmul float %249, %270 %279 = fmul float %251, %270 %280 = call float @llvm.AMDGPU.cndlt(float %265, float 0.000000e+00, float %277) %281 = call float @llvm.AMDGPU.cndlt(float %265, float 0.000000e+00, float %278) %282 = call float @llvm.AMDGPU.cndlt(float %265, float 0.000000e+00, float %279) %283 = fadd float %280, %274 %284 = fadd float %281, %275 %285 = fadd float %282, %276 %286 = fmul float %temp12.0, %283 %287 = fmul float %temp13.0, %284 %288 = fmul float %temp14.0, %285 %289 = fmul float %286, %38 %290 = fmul float %287, %39 %291 = fmul float %288, %40 %292 = call i32 @llvm.SI.packf16(float %289, float %290) %293 = bitcast i32 %292 to float %294 = call i32 @llvm.SI.packf16(float %291, float 0.000000e+00) %295 = bitcast i32 %294 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %293, float %295, float %293, float %295) ret void IF29: ; preds = %IF %296 = fmul float %79, %79 %297 = fmul float %80, %80 %298 = fadd float %297, %296 %299 = fmul float %81, %81 %300 = fadd float %298, %299 %301 = call float @fabs(float %300) %302 = call float @llvm.AMDGPU.rsq.clamped.f32(float %301) %303 = call float @llvm.minnum.f32(float %302, float 0x47EFFFFFE0000000) %304 = fdiv float 1.000000e+00, %303 %305 = fsub float %24, %304 %306 = fmul float %305, %25 %307 = call float @llvm.AMDIL.clamp.(float %306, float 0.000000e+00, float 1.000000e+00) %308 = fmul float %307, %307 %309 = call float @llvm.AMDGPU.lrp(float %308, float %215, float %155) %310 = call float @llvm.AMDGPU.lrp(float %308, float %215, float %155) %311 = call float @llvm.AMDGPU.lrp(float %308, float %215, float %155) br label %ENDIF28 ENDIF28: ; preds = %IF, %IF29 %temp24.0 = phi float [ %309, %IF29 ], [ %155, %IF ] %temp25.0 = phi float [ %310, %IF29 ], [ %155, %IF ] %temp26.0 = phi float [ %311, %IF29 ], [ %155, %IF ] %312 = fmul float %212, %temp24.0 %313 = fmul float %213, %temp25.0 %314 = fmul float %214, %temp26.0 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_movk_i32 s0, 0xf00 ; B0000F00 v_mov_b32_e32 v10, 0xb58637bd ; 7E1402FF B58637BD s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p1_f32 v11, v0, 0, 0, [m0] ; C82C0000 v_interp_p2_f32 v11, [v11], v1, 0, 0, [m0] ; C82D0001 v_interp_p1_f32 v12, v0, 1, 0, [m0] ; C8300100 v_interp_p2_f32 v12, [v12], v1, 1, 0, [m0] ; C8310101 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v4, v0, 0, 2, [m0] ; C8100800 v_interp_p2_f32 v4, [v4], v1, 0, 2, [m0] ; C8110801 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s2, s[8:11], s0 ; C2010800 s_buffer_load_dword s0, s[8:11], 0x13 ; C2000913 s_buffer_load_dword s1, s[8:11], 0x17 ; C2008917 s_buffer_load_dword s3, s[8:11], 0x34 ; C2018934 s_buffer_load_dword s12, s[8:11], 0x35 ; C2060935 s_buffer_load_dword s13, s[8:11], 0x36 ; C2068936 v_interp_p1_f32 v5, v0, 1, 2, [m0] ; C8140900 v_interp_p2_f32 v5, [v5], v1, 1, 2, [m0] ; C8150901 v_interp_p1_f32 v6, v0, 2, 2, [m0] ; C8180A00 v_interp_p2_f32 v6, [v6], v1, 2, 2, [m0] ; C8190A01 v_interp_p1_f32 v9, v0, 0, 3, [m0] ; C8240C00 v_interp_p2_f32 v9, [v9], v1, 0, 3, [m0] ; C8250C01 v_interp_p1_f32 v8, v0, 1, 3, [m0] ; C8200D00 v_interp_p2_f32 v8, [v8], v1, 1, 3, [m0] ; C8210D01 v_interp_p1_f32 v7, v0, 2, 3, [m0] ; C81C0E00 v_interp_p2_f32 v7, [v7], v1, 2, 3, [m0] ; C81D0E01 s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_load_dwordx4 s[20:23], s[4:5], 0x10 ; C08A0510 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_load_dwordx8 s[32:39], s[6:7], 0x20 ; C0D00720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[16:19] ; F0800700 00860F02 image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[32:39], s[20:23] ; F0800100 00A80B0B s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v11, s3, v11 ; 06161603 v_mul_f32_e32 v11, s12, v11 ; 1016160C v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_and_b32_e32 v12, 0x7fffffff, v11 ; 361816FF 7FFFFFFF v_log_f32_e32 v18, v12 ; 7E244F0C v_mad_f32 v14, 2.0, v15, -1.0 ; D282000E 03CE1EF4 v_mad_f32 v13, 2.0, v16, -1.0 ; D282000D 03CE20F4 v_mad_f32 v12, 2.0, v17, -1.0 ; D282000C 03CE22F4 v_add_f32_e32 v10, v11, v10 ; 0614150B v_mul_legacy_f32_e32 v16, s13, v18 ; 0E20240D v_mul_f32_e32 v11, v9, v9 ; 10161309 v_mad_f32 v11, v8, v8, v11 ; D282000B 042E1108 v_mad_f32 v11, v7, v7, v11 ; D282000B 042E0F07 v_rsq_clamp_f32_e32 v15, v11 ; 7E1E590B v_mul_f32_e32 v11, v4, v4 ; 10160904 v_mad_f32 v11, v5, v5, v11 ; D282000B 042E0B05 v_mad_f32 v11, v6, v6, v11 ; D282000B 042E0D06 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_mul_f32_e32 v17, v14, v14 ; 10221D0E v_mad_f32 v17, v13, v13, v17 ; D2820011 04461B0D v_mad_f32 v17, v12, v12, v17 ; D2820011 0446190C v_rsq_clamp_f32_e32 v18, v17 ; 7E245911 v_exp_f32_e32 v16, v16 ; 7E204B10 v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v10, v16, 0, vcc ; D200000A 01A90110 v_cmp_ne_i32_e64 s[2:3], 0, s2 ; D10A0002 00000480 v_mov_b32_e32 v16, v10 ; 7E20030A v_mov_b32_e32 v17, v10 ; 7E22030A s_and_saveexec_b64 s[2:3], s[2:3] ; BE822402 s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E s_cbranch_execz BB0_4 ; BF880000 v_interp_p1_f32 v16, v0, 0, 4, [m0] ; C8401000 v_interp_p2_f32 v16, [v16], v1, 0, 4, [m0] ; C8411001 v_interp_p1_f32 v17, v0, 1, 4, [m0] ; C8441100 v_interp_p2_f32 v17, [v17], v1, 1, 4, [m0] ; C8451101 v_interp_p1_f32 v19, v0, 2, 4, [m0] ; C84C1200 v_interp_p2_f32 v19, [v19], v1, 2, 4, [m0] ; C84D1201 s_buffer_load_dword s12, s[8:11], 0x19 ; C2060919 s_buffer_load_dword s13, s[8:11], 0x1b ; C206891B s_buffer_load_dword s14, s[8:11], 0x1c ; C207091C s_buffer_load_dword s15, s[8:11], 0x1d ; C207891D s_buffer_load_dword s16, s[8:11], 0x4 ; C2080904 s_buffer_load_dword s17, s[8:11], 0x5 ; C2088905 s_buffer_load_dword s18, s[8:11], 0x6 ; C2090906 s_buffer_load_dword s19, s[8:11], 0x7 ; C2098907 s_buffer_load_dword s20, s[8:11], 0x18 ; C20A0918 v_interp_p1_f32 v0, v0, 3, 4, [m0] ; C8001300 v_interp_p2_f32 v0, [v0], v1, 3, 4, [m0] ; C8011301 s_buffer_load_dword s21, s[8:11], 0x27 ; C20A8927 s_buffer_load_dword s22, s[8:11], 0x1f ; C20B091F s_buffer_load_dword s23, s[8:11], 0x20 ; C20B8920 s_buffer_load_dword s24, s[8:11], 0x21 ; C20C0921 s_buffer_load_dword s25, s[8:11], 0x23 ; C20C8923 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s14, v17 ; 1002220E v_mul_f32_e32 v20, s15, v17 ; 1028220F v_mad_f32 v1, s20, v16, v1 ; D2820001 04062014 v_mad_f32 v20, s12, v16, v20 ; D2820014 0452200C s_buffer_load_dword s12, s[8:11], 0x24 ; C2060924 s_buffer_load_dword s14, s[8:11], 0x25 ; C2070925 v_mul_f32_e32 v21, s22, v17 ; 102A2216 v_mad_f32 v21, s13, v16, v21 ; D2820015 0456200D v_mad_f32 v1, s23, v19, v1 ; D2820001 04062617 v_mad_f32 v21, s25, v19, v21 ; D2820015 04562619 v_mad_f32 v21, s21, v0, v21 ; D2820015 04560015 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_movk_i32 s13, 0xf04 ; B00D0F04 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_buffer_load_dword s13, s[8:11], s13 ; C206880D v_rcp_f32_e32 v21, v21 ; 7E2A5515 v_mad_f32 v20, s24, v19, v20 ; D2820014 04522618 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s12, v0, v1 ; D2820001 0406000C v_mad_f32 v0, s14, v0, v20 ; D2820000 0452000E v_mul_f32_e32 v1, v1, v21 ; 10022B01 v_mul_f32_e32 v0, v0, v21 ; 10002B00 v_mov_b32_e32 v20, s19 ; 7E280213 v_mad_f32 v20, s16, v1, v20 ; D2820014 04520210 v_mov_b32_e32 v1, s18 ; 7E020212 v_mad_f32 v21, s17, v0, v1 ; D2820015 04060011 image_sample v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[28:35], s[20:23] ; F0800F00 00A71414 v_cmp_ne_i32_e64 s[12:13], 0, s13 ; D10A000C 00001A80 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[12:13], s[12:13] ; BE8C240C s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_cbranch_execz BB0_5 ; BF880000 s_buffer_load_dword s14, s[8:11], 0x0 ; C2070900 s_buffer_load_dword s15, s[8:11], 0x1 ; C2078901 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v16, v16 ; 10002110 v_mad_f32 v0, v17, v17, v0 ; D2820000 04022311 v_mad_f32 v0, v19, v19, v0 ; D2820000 04022713 v_rsq_clamp_f32_e64 v0, |v0| ; D3580100 00000100 v_min_f32_e32 v0, 0x7f7fffff, v0 ; 1E0000FF 7F7FFFFF v_rcp_f32_e32 v0, v0 ; 7E005500 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v0, s14, v0 ; 0800000E v_mul_f32_e32 v0, s15, v0 ; 1000000F v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v1, v0, v0 ; 10020100 v_mad_f32 v0, -v0, v0, 1.0 ; D2820000 23CA0100 v_mul_f32_e32 v0, v10, v0 ; 1000010A v_mad_f32 v10, v1, v23, v0 ; D282000A 04022F01 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E v_mul_f32_e32 v17, v10, v20 ; 1022290A v_mul_f32_e32 v16, v10, v21 ; 10202B0A v_mul_f32_e32 v10, v10, v22 ; 10142D0A s_or_b64 exec, exec, s[2:3] ; 88FE027E v_min_f32_e32 v0, 0x7f7fffff, v18 ; 1E0024FF 7F7FFFFF s_buffer_load_dword s18, s[8:11], 0x10 ; C2090910 s_buffer_load_dword s19, s[8:11], 0x11 ; C2098911 s_buffer_load_dword s13, s[8:11], 0x12 ; C2068912 s_buffer_load_dword s14, s[8:11], 0x14 ; C2070914 s_buffer_load_dword s15, s[8:11], 0x15 ; C2078915 s_buffer_load_dword s16, s[8:11], 0x16 ; C2080916 s_buffer_load_dword s20, s[8:11], 0x28 ; C20A0928 s_buffer_load_dword s21, s[8:11], 0x29 ; C20A8929 s_buffer_load_dword s17, s[8:11], 0x2a ; C208892A s_buffer_load_dword s12, s[8:11], 0x2e ; C206092E s_buffer_load_dword s2, s[8:11], 0x30 ; C2010930 s_buffer_load_dword s3, s[8:11], 0x31 ; C2018931 s_buffer_load_dword s8, s[8:11], 0x32 ; C2040932 v_mul_f32_e32 v14, v0, v14 ; 101C1D00 v_mul_f32_e32 v13, v0, v13 ; 101A1B00 v_mul_f32_e32 v0, v0, v12 ; 10001900 v_min_f32_e32 v1, 0x7f7fffff, v15 ; 1E021EFF 7F7FFFFF v_mul_f32_e32 v12, v1, v9 ; 10181301 v_mul_f32_e32 v12, v12, v14 ; 10181D0C v_mul_f32_e32 v15, v1, v8 ; 101E1101 v_mad_f32 v12, v13, v15, v12 ; D282000C 04321F0D v_mul_f32_e32 v15, v1, v7 ; 101E0F01 v_mad_f32 v12, v0, v15, v12 ; D282000C 04321F00 v_mul_f32_e32 v15, v14, v12 ; 101E190E v_mad_f32 v15, v12, v14, v15 ; D282000F 043E1D0C v_mad_f32 v9, -v9, v1, v15 ; D2820009 243E0309 v_mul_f32_e32 v15, v13, v12 ; 101E190D s_load_dwordx4 s[24:27], s[4:5], 0x8 ; C08C0508 s_load_dwordx4 s[28:31], s[4:5], 0xc ; C08E050C s_load_dwordx8 s[32:39], s[6:7], 0x10 ; C0D00710 s_load_dwordx8 s[40:47], s[6:7], 0x18 ; C0D40718 v_mad_f32 v15, v12, v13, v15 ; D282000F 043E1B0C v_mad_f32 v8, -v8, v1, v15 ; D2820008 243E0308 v_mul_f32_e32 v15, v0, v12 ; 101E1900 v_mad_f32 v12, v12, v0, v15 ; D282000C 043E010C v_mad_f32 v1, -v7, v1, v12 ; D2820001 24320307 v_min_f32_e32 v12, 0x7f7fffff, v11 ; 1E1816FF 7F7FFFFF v_mul_f32_e32 v7, v12, v4 ; 100E090C v_mul_f32_e32 v11, v12, v5 ; 10160B0C v_mul_f32_e32 v4, v12, v6 ; 10080D0C v_mov_b32_e32 v5, s0 ; 7E0A0200 v_mov_b32_e32 v6, s1 ; 7E0C0201 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[24:27] ; F0800700 00C81202 image_sample v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[40:47], s[28:31] ; F0800700 00EA1502 v_sub_f32_e64 v2, 1.0, s20 ; D2080002 000028F2 v_sub_f32_e64 v3, 1.0, s21 ; D2080003 00002AF2 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v2, v18, v2 ; 10040512 v_mad_f32 v2, v5, v2, s18 ; D2820002 004A0505 v_mul_f32_e32 v3, v19, v3 ; 10060713 v_mad_f32 v3, v3, v5, s19 ; D2820003 004E0B03 v_mul_f32_e32 v12, v7, v14 ; 10181D07 v_mad_f32 v12, v13, v11, v12 ; D282000C 0432170D v_mul_f32_e32 v7, v7, v9 ; 100E1307 v_mad_f32 v7, v8, v11, v7 ; D2820007 041E1708 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v8, v6, v21, s14 ; D2820008 003A2B06 v_mad_f32 v9, v22, v6, s15 ; D2820009 003E0D16 v_mad_f32 v6, v23, v6, s16 ; D2820006 00420D17 v_sub_f32_e64 v11, 1.0, s17 ; D208000B 000022F2 v_mul_f32_e32 v11, v20, v11 ; 10161714 v_mad_f32 v5, v11, v5, s13 ; D2820005 00360B0B v_mad_f32 v0, v0, v4, v12 ; D2820000 04320900 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mad_f32 v1, v1, v4, v7 ; D2820001 041E0901 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_and_b32_e32 v4, 0x7fffffff, v1 ; 360802FF 7FFFFFFF v_log_f32_e32 v4, v4 ; 7E084F04 v_mov_b32_e32 v7, 0xb58637bd ; 7E0E02FF B58637BD v_add_f32_e32 v11, v7, v0 ; 06160107 v_add_f32_e32 v1, v7, v1 ; 06020307 v_mul_legacy_f32_e32 v4, s12, v4 ; 0E08080C v_exp_f32_e32 v4, v4 ; 7E084B04 v_mov_b32_e32 v7, 0x41000000 ; 7E0E02FF 41000000 v_add_f32_e32 v7, s12, v7 ; 060E0E0C v_mul_f32_e32 v4, v4, v7 ; 10080F04 v_mul_f32_e32 v4, 0x3d22f983, v4 ; 100808FF 3D22F983 v_mul_f32_e32 v2, v2, v0 ; 10040102 v_mul_f32_e32 v3, v3, v0 ; 10060103 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 v_mul_f32_e32 v5, v4, v8 ; 100A1104 v_mul_f32_e32 v7, v4, v9 ; 100E1304 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, v5, 0, vcc ; D2000001 01A90105 v_cndmask_b32_e64 v5, v7, 0, vcc ; D2000005 01A90107 v_cndmask_b32_e64 v4, v4, 0, vcc ; D2000004 01A90104 v_add_f32_e32 v1, v2, v1 ; 06020302 v_add_f32_e32 v2, v3, v5 ; 06040B03 v_add_f32_e32 v0, v0, v4 ; 06000900 v_mul_f32_e32 v1, v1, v17 ; 10022301 v_mul_f32_e32 v2, v2, v16 ; 10042102 v_mul_f32_e32 v0, v0, v10 ; 10001500 v_mul_f32_e32 v1, s2, v1 ; 10020202 v_mul_f32_e32 v2, s3, v2 ; 10040403 v_mul_f32_e32 v0, s8, v0 ; 10000008 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 24 Code Size: 1320 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], COLOR, PERSPECTIVE DCL IN[1], COLOR[1], PERSPECTIVE DCL IN[2], TEXCOORD[0], PERSPECTIVE DCL IN[3], TEXCOORD[1], PERSPECTIVE DCL IN[4], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL CONST[0..11] DCL TEMP[0..1] DCL TEMP[2], LOCAL DCL TEMP[3..5] IMM[0] FLT32 { 0.0000, 2.0000, 1.0000, 0.0000} IMM[1] FLT32 { 0.8165, 0.5774, 0.0000, 0.3333} IMM[2] FLT32 { -0.7071, -0.4082, 0.5774, 0.7071} IMM[3] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0].xyz, CONST[9], CONST[9].wwww 1: TEX TEMP[1], IN[3], SAMP[4], 2D 2: MUL TEMP[0].xyz, TEMP[0], TEMP[1] 3: DP3 TEMP[2].x, IN[1], IN[1] 4: RSQ TEMP[2].x, TEMP[2].xxxx 5: MIN TEMP[2].x, IMM[3].xxxx, TEMP[2].xxxx 6: MUL TEMP[1].xyz, IN[1], TEMP[2].xxxx 7: DP3 TEMP[2].x, IN[0], IN[0] 8: RSQ TEMP[2].x, TEMP[2].xxxx 9: MIN TEMP[2].x, IMM[3].xxxx, TEMP[2].xxxx 10: MUL TEMP[3].xyz, IN[0], TEMP[2].xxxx 11: MUL TEMP[4].xyz, TEMP[1].zxyw, TEMP[3].yzxw 12: MAD TEMP[4].xyz, TEMP[1].yzxw, TEMP[3].zxyw, -TEMP[4] 13: MUL TEMP[4].xyz, TEMP[4], IN[1].wwww 14: DP3 TEMP[2].x, IN[4], IN[4] 15: RSQ TEMP[2].x, TEMP[2].xxxx 16: MIN TEMP[2].x, IMM[3].xxxx, TEMP[2].xxxx 17: MUL TEMP[5].xyz, IN[4], TEMP[2].xxxx 18: MAD TEMP[5].xyz, TEMP[5].zzzz, IMM[0].xxyw, -TEMP[5] 19: DP3 TEMP[4].y, TEMP[4], TEMP[5] 20: DP3 TEMP[4].x, TEMP[3], TEMP[5] 21: DP3 TEMP[4].z, TEMP[1], TEMP[5] 22: TEX TEMP[1], TEMP[4], SAMP[2], CUBE 23: MUL TEMP[3].xyz, CONST[8], CONST[8].wwww 24: MUL TEMP[1].xyz, TEMP[1], TEMP[3] 25: TEX TEMP[3], IN[3], SAMP[3], 2D 26: MAD TEMP[0].xyz, TEMP[3].xxxx, TEMP[1], TEMP[0] 27: MOV TEMP[1].z, IMM[0].zzzz 28: ADD TEMP[1].xyz, TEMP[1].zzzz, -CONST[0] 29: MUL TEMP[0].xyz, TEMP[0], TEMP[1] 30: MAD TEMP[0].xyz, TEMP[0], CONST[4].wwww, CONST[4] 31: TEX TEMP[1], IN[2], SAMP[0], 2D 32: MUL TEMP[1].xyz, TEMP[1], CONST[6] 33: MUL TEMP[3].xyz, TEMP[0], TEMP[1] 34: DP2 TEMP[2].x, TEMP[5].yzzw, IMM[1] 35: ADD_SAT TEMP[4].x, IMM[1].zzzz, TEMP[2].xxxx 36: DP3_SAT TEMP[4].y, TEMP[5], IMM[2] 37: DP3_SAT TEMP[4].z, TEMP[5].yzxw, IMM[2].yzww 38: MAX TEMP[5].xyz, TEMP[4], IMM[0].wwww 39: MUL TEMP[4].xyz, TEMP[5], TEMP[5] 40: MUL TEMP[4].xyz, TEMP[4], TEMP[4] 41: MUL TEMP[4].xyz, TEMP[4], TEMP[4] 42: MUL TEMP[4].xyz, TEMP[4], TEMP[4] 43: TEX TEMP[5], IN[2], SAMP[1], 2D 44: MUL TEMP[5].xyz, TEMP[5], CONST[7] 45: DP3 TEMP[0].w, TEMP[5], TEMP[4] 46: DP3 TEMP[1].w, TEMP[5], IMM[1].wwww 47: TEX TEMP[4], IN[3], SAMP[5], 2D 48: MAD TEMP[4].xyz, TEMP[4], CONST[5].wwww, CONST[5] 49: MUL TEMP[1].xyz, TEMP[1], TEMP[4] 50: MUL TEMP[1].xyz, TEMP[0].wwww, TEMP[1] 51: MAD TEMP[1].xyz, TEMP[3], TEMP[1].wwww, TEMP[1] 52: ADD TEMP[1].xyz, TEMP[1], CONST[0] 53: MAD OUT[0].xyz, TEMP[0], CONST[11], TEMP[1] 54: TEX TEMP[0], IN[3], SAMP[6], 2D 55: MAX TEMP[1].x, TEMP[0].xxxx, CONST[10].yyyy 56: MIN_SAT OUT[0].w, CONST[10].xxxx, TEMP[1].xxxx 57: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %54 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 %58 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %59 = bitcast <8 x i32> addrspace(2)* %58 to <32 x i8> addrspace(2)* %60 = load <32 x i8>, <32 x i8> addrspace(2)* %59, align 32, !tbaa !0 %61 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %62 = bitcast <4 x i32> addrspace(2)* %61 to <16 x i8> addrspace(2)* %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 %64 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %65 = bitcast <8 x i32> addrspace(2)* %64 to <32 x i8> addrspace(2)* %66 = load <32 x i8>, <32 x i8> addrspace(2)* %65, align 32, !tbaa !0 %67 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %68 = bitcast <4 x i32> addrspace(2)* %67 to <16 x i8> addrspace(2)* %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 %70 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %71 = bitcast <8 x i32> addrspace(2)* %70 to <32 x i8> addrspace(2)* %72 = load <32 x i8>, <32 x i8> addrspace(2)* %71, align 32, !tbaa !0 %73 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %74 = bitcast <4 x i32> addrspace(2)* %73 to <16 x i8> addrspace(2)* %75 = load <16 x i8>, <16 x i8> addrspace(2)* %74, align 16, !tbaa !0 %76 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %77 = bitcast <8 x i32> addrspace(2)* %76 to <32 x i8> addrspace(2)* %78 = load <32 x i8>, <32 x i8> addrspace(2)* %77, align 32, !tbaa !0 %79 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %80 = bitcast <4 x i32> addrspace(2)* %79 to <16 x i8> addrspace(2)* %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %83 = bitcast <8 x i32> addrspace(2)* %82 to <32 x i8> addrspace(2)* %84 = load <32 x i8>, <32 x i8> addrspace(2)* %83, align 32, !tbaa !0 %85 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %86 = bitcast <4 x i32> addrspace(2)* %85 to <16 x i8> addrspace(2)* %87 = load <16 x i8>, <16 x i8> addrspace(2)* %86, align 16, !tbaa !0 %88 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %89 = bitcast <8 x i32> addrspace(2)* %88 to <32 x i8> addrspace(2)* %90 = load <32 x i8>, <32 x i8> addrspace(2)* %89, align 32, !tbaa !0 %91 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %92 = bitcast <4 x i32> addrspace(2)* %91 to <16 x i8> addrspace(2)* %93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0 %94 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %108 = fmul float %45, %48 %109 = fmul float %46, %48 %110 = fmul float %47, %48 %111 = bitcast float %103 to i32 %112 = bitcast float %104 to i32 %113 = insertelement <2 x i32> undef, i32 %111, i32 0 %114 = insertelement <2 x i32> %113, i32 %112, i32 1 %115 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %114, <32 x i8> %78, <16 x i8> %81, i32 2) %116 = extractelement <4 x float> %115, i32 0 %117 = extractelement <4 x float> %115, i32 1 %118 = extractelement <4 x float> %115, i32 2 %119 = fmul float %108, %116 %120 = fmul float %109, %117 %121 = fmul float %110, %118 %122 = fmul float %97, %97 %123 = fmul float %98, %98 %124 = fadd float %123, %122 %125 = fmul float %99, %99 %126 = fadd float %124, %125 %127 = call float @llvm.AMDGPU.rsq.clamped.f32(float %126) %128 = call float @llvm.minnum.f32(float %127, float 0x47EFFFFFE0000000) %129 = fmul float %97, %128 %130 = fmul float %98, %128 %131 = fmul float %99, %128 %132 = fmul float %94, %94 %133 = fmul float %95, %95 %134 = fadd float %133, %132 %135 = fmul float %96, %96 %136 = fadd float %134, %135 %137 = call float @llvm.AMDGPU.rsq.clamped.f32(float %136) %138 = call float @llvm.minnum.f32(float %137, float 0x47EFFFFFE0000000) %139 = fmul float %94, %138 %140 = fmul float %95, %138 %141 = fmul float %96, %138 %142 = fmul float %131, %140 %143 = fmul float %129, %141 %144 = fmul float %130, %139 %145 = fmul float %130, %141 %146 = fsub float %145, %142 %147 = fmul float %131, %139 %148 = fsub float %147, %143 %149 = fmul float %129, %140 %150 = fsub float %149, %144 %151 = fmul float %146, %100 %152 = fmul float %148, %100 %153 = fmul float %150, %100 %154 = fmul float %105, %105 %155 = fmul float %106, %106 %156 = fadd float %155, %154 %157 = fmul float %107, %107 %158 = fadd float %156, %157 %159 = call float @llvm.AMDGPU.rsq.clamped.f32(float %158) %160 = call float @llvm.minnum.f32(float %159, float 0x47EFFFFFE0000000) %161 = fmul float %105, %160 %162 = fmul float %106, %160 %163 = fmul float %107, %160 %164 = fmul float %163, 0.000000e+00 %165 = fsub float %164, %161 %166 = fmul float %163, 0.000000e+00 %167 = fsub float %166, %162 %168 = fmul float %163, 2.000000e+00 %169 = fsub float %168, %163 %170 = fmul float %151, %165 %171 = fmul float %152, %167 %172 = fadd float %171, %170 %173 = fmul float %153, %169 %174 = fadd float %172, %173 %175 = fmul float %139, %165 %176 = fmul float %140, %167 %177 = fadd float %176, %175 %178 = fmul float %141, %169 %179 = fadd float %177, %178 %180 = fmul float %129, %165 %181 = fmul float %130, %167 %182 = fadd float %181, %180 %183 = fmul float %131, %169 %184 = fadd float %182, %183 %185 = insertelement <4 x float> undef, float %179, i32 0 %186 = insertelement <4 x float> %185, float %174, i32 1 %187 = insertelement <4 x float> %186, float %184, i32 2 %188 = insertelement <4 x float> %187, float 0.000000e+00, i32 3 %189 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %188) %190 = extractelement <4 x float> %189, i32 0 %191 = extractelement <4 x float> %189, i32 1 %192 = extractelement <4 x float> %189, i32 2 %193 = extractelement <4 x float> %189, i32 3 %194 = call float @fabs(float %192) %195 = fdiv float 1.000000e+00, %194 %196 = fmul float %190, %195 %197 = fadd float %196, 1.500000e+00 %198 = fmul float %191, %195 %199 = fadd float %198, 1.500000e+00 %200 = bitcast float %199 to i32 %201 = bitcast float %197 to i32 %202 = bitcast float %193 to i32 %203 = insertelement <4 x i32> undef, i32 %200, i32 0 %204 = insertelement <4 x i32> %203, i32 %201, i32 1 %205 = insertelement <4 x i32> %204, i32 %202, i32 2 %206 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %205, <32 x i8> %66, <16 x i8> %69, i32 4) %207 = extractelement <4 x float> %206, i32 0 %208 = extractelement <4 x float> %206, i32 1 %209 = extractelement <4 x float> %206, i32 2 %210 = fmul float %41, %44 %211 = fmul float %42, %44 %212 = fmul float %43, %44 %213 = fmul float %207, %210 %214 = fmul float %208, %211 %215 = fmul float %209, %212 %216 = bitcast float %103 to i32 %217 = bitcast float %104 to i32 %218 = insertelement <2 x i32> undef, i32 %216, i32 0 %219 = insertelement <2 x i32> %218, i32 %217, i32 1 %220 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %219, <32 x i8> %72, <16 x i8> %75, i32 2) %221 = extractelement <4 x float> %220, i32 0 %222 = fmul float %221, %213 %223 = fadd float %222, %119 %224 = fmul float %221, %214 %225 = fadd float %224, %120 %226 = fmul float %221, %215 %227 = fadd float %226, %121 %228 = fsub float 1.000000e+00, %24 %229 = fsub float 1.000000e+00, %25 %230 = fsub float 1.000000e+00, %26 %231 = fmul float %223, %228 %232 = fmul float %225, %229 %233 = fmul float %227, %230 %234 = fmul float %231, %30 %235 = fadd float %234, %27 %236 = fmul float %232, %30 %237 = fadd float %236, %28 %238 = fmul float %233, %30 %239 = fadd float %238, %29 %240 = bitcast float %101 to i32 %241 = bitcast float %102 to i32 %242 = insertelement <2 x i32> undef, i32 %240, i32 0 %243 = insertelement <2 x i32> %242, i32 %241, i32 1 %244 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %243, <32 x i8> %55, <16 x i8> %57, i32 2) %245 = extractelement <4 x float> %244, i32 0 %246 = extractelement <4 x float> %244, i32 1 %247 = extractelement <4 x float> %244, i32 2 %248 = fmul float %245, %35 %249 = fmul float %246, %36 %250 = fmul float %247, %37 %251 = fmul float %235, %248 %252 = fmul float %237, %249 %253 = fmul float %239, %250 %254 = fmul float %167, 0x3FEA20BD80000000 %255 = fmul float %169, 0x3FE279A740000000 %256 = fadd float %254, %255 %257 = fadd float %256, 0.000000e+00 %258 = call float @llvm.AMDIL.clamp.(float %257, float 0.000000e+00, float 1.000000e+00) %259 = fmul float %165, 0xBFE6A09E60000000 %260 = fmul float %167, 0xBFDA20BD80000000 %261 = fadd float %260, %259 %262 = fmul float %169, 0x3FE279A740000000 %263 = fadd float %261, %262 %264 = call float @llvm.AMDIL.clamp.(float %263, float 0.000000e+00, float 1.000000e+00) %265 = fmul float %167, 0xBFDA20BD80000000 %266 = fmul float %169, 0x3FE279A740000000 %267 = fadd float %266, %265 %268 = fmul float %165, 0x3FE6A09E60000000 %269 = fadd float %267, %268 %270 = call float @llvm.AMDIL.clamp.(float %269, float 0.000000e+00, float 1.000000e+00) %271 = call float @llvm.maxnum.f32(float %258, float 0x3EB0C6F7A0000000) %272 = call float @llvm.maxnum.f32(float %264, float 0x3EB0C6F7A0000000) %273 = call float @llvm.maxnum.f32(float %270, float 0x3EB0C6F7A0000000) %274 = fmul float %271, %271 %275 = fmul float %272, %272 %276 = fmul float %273, %273 %277 = fmul float %274, %274 %278 = fmul float %275, %275 %279 = fmul float %276, %276 %280 = fmul float %277, %277 %281 = fmul float %278, %278 %282 = fmul float %279, %279 %283 = fmul float %280, %280 %284 = fmul float %281, %281 %285 = fmul float %282, %282 %286 = bitcast float %101 to i32 %287 = bitcast float %102 to i32 %288 = insertelement <2 x i32> undef, i32 %286, i32 0 %289 = insertelement <2 x i32> %288, i32 %287, i32 1 %290 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %289, <32 x i8> %60, <16 x i8> %63, i32 2) %291 = extractelement <4 x float> %290, i32 0 %292 = extractelement <4 x float> %290, i32 1 %293 = extractelement <4 x float> %290, i32 2 %294 = fmul float %291, %38 %295 = fmul float %292, %39 %296 = fmul float %293, %40 %297 = fmul float %294, %283 %298 = fmul float %295, %284 %299 = fadd float %298, %297 %300 = fmul float %296, %285 %301 = fadd float %299, %300 %302 = fmul float %294, 0x3FD5555560000000 %303 = fmul float %295, 0x3FD5555560000000 %304 = fadd float %303, %302 %305 = fmul float %296, 0x3FD5555560000000 %306 = fadd float %304, %305 %307 = bitcast float %103 to i32 %308 = bitcast float %104 to i32 %309 = insertelement <2 x i32> undef, i32 %307, i32 0 %310 = insertelement <2 x i32> %309, i32 %308, i32 1 %311 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %310, <32 x i8> %84, <16 x i8> %87, i32 2) %312 = extractelement <4 x float> %311, i32 0 %313 = extractelement <4 x float> %311, i32 1 %314 = extractelement <4 x float> %311, i32 2 %315 = fmul float %312, %34 %316 = fadd float %315, %31 %317 = fmul float %313, %34 %318 = fadd float %317, %32 %319 = fmul float %314, %34 %320 = fadd float %319, %33 %321 = fmul float %248, %316 %322 = fmul float %249, %318 %323 = fmul float %250, %320 %324 = fmul float %301, %321 %325 = fmul float %301, %322 %326 = fmul float %301, %323 %327 = fmul float %251, %306 %328 = fadd float %327, %324 %329 = fmul float %252, %306 %330 = fadd float %329, %325 %331 = fmul float %253, %306 %332 = fadd float %331, %326 %333 = fadd float %328, %24 %334 = fadd float %330, %25 %335 = fadd float %332, %26 %336 = fmul float %235, %51 %337 = fadd float %336, %333 %338 = fmul float %237, %52 %339 = fadd float %338, %334 %340 = fmul float %239, %53 %341 = fadd float %340, %335 %342 = bitcast float %103 to i32 %343 = bitcast float %104 to i32 %344 = insertelement <2 x i32> undef, i32 %342, i32 0 %345 = insertelement <2 x i32> %344, i32 %343, i32 1 %346 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %345, <32 x i8> %90, <16 x i8> %93, i32 2) %347 = extractelement <4 x float> %346, i32 0 %348 = call float @llvm.maxnum.f32(float %347, float %50) %349 = call float @llvm.minnum.f32(float %49, float %348) %350 = call float @llvm.AMDIL.clamp.(float %349, float 0.000000e+00, float 1.000000e+00) %351 = call i32 @llvm.SI.packf16(float %337, float %339) %352 = bitcast i32 %351 to float %353 = call i32 @llvm.SI.packf16(float %341, float %350) %354 = bitcast i32 %353 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %352, float %354, float %352, float %354) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700 v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701 v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800 v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801 v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900 v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 v_interp_p1_f32 v13, v0, 0, 4, [m0] ; C8341000 v_interp_p2_f32 v13, [v13], v1, 0, 4, [m0] ; C8351001 v_interp_p1_f32 v14, v0, 1, 4, [m0] ; C8381100 v_mul_f32_e32 v15, v5, v5 ; 101E0B05 v_mad_f32 v15, v6, v6, v15 ; D282000F 043E0D06 v_mad_f32 v15, v7, v7, v15 ; D282000F 043E0F07 v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F v_interp_p2_f32 v14, [v14], v1, 1, 4, [m0] ; C8391101 v_interp_p1_f32 v0, v0, 2, 4, [m0] ; C8001200 v_interp_p2_f32 v0, [v0], v1, 2, 4, [m0] ; C8011201 v_min_f32_e32 v1, 0x7f7fffff, v15 ; 1E021EFF 7F7FFFFF v_mul_f32_e32 v15, v2, v2 ; 101E0502 v_mad_f32 v15, v3, v3, v15 ; D282000F 043E0703 v_mad_f32 v15, v4, v4, v15 ; D282000F 043E0904 v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F v_mul_f32_e32 v5, v1, v5 ; 100A0B01 v_mul_f32_e32 v6, v1, v6 ; 100C0D01 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_min_f32_e32 v7, 0x7f7fffff, v15 ; 1E0E1EFF 7F7FFFFF v_mul_f32_e32 v2, v7, v2 ; 10040507 v_mul_f32_e32 v3, v7, v3 ; 10060707 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mul_f32_e32 v7, v3, v1 ; 100E0303 v_mad_f32 v7, v6, v4, -v7 ; D2820007 841E0906 v_mul_f32_e32 v15, v4, v5 ; 101E0B04 v_mad_f32 v15, v1, v2, -v15 ; D282000F 843E0501 v_mul_f32_e32 v16, v2, v6 ; 10200D02 v_mad_f32 v16, v5, v3, -v16 ; D2820010 84420705 v_mul_f32_e32 v17, v13, v13 ; 10221B0D v_mad_f32 v17, v14, v14, v17 ; D2820011 04461D0E v_mad_f32 v17, v0, v0, v17 ; D2820011 04460100 v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mul_f32_e32 v15, v8, v15 ; 101E1F08 v_mul_f32_e32 v8, v8, v16 ; 10102108 v_min_f32_e32 v16, 0x7f7fffff, v17 ; 1E2022FF 7F7FFFFF v_mul_f32_e32 v17, v16, v0 ; 10220110 v_mad_f32 v18, v0, v16, v17 ; D2820012 04462100 v_mad_f32 v0, -v0, v16, v18 ; D2820000 244A2100 v_mul_f32_e32 v13, v16, v13 ; 101A1B10 v_mul_f32_e32 v14, v16, v14 ; 101C1D10 v_mad_f32 v13, 0, v17, -v13 ; D282000D 84362280 v_mad_f32 v14, 0, v17, -v14 ; D282000E 843A2280 v_mul_f32_e32 v7, v13, v7 ; 100E0F0D v_mad_f32 v7, v15, v14, v7 ; D2820007 041E1D0F v_mov_b32_e32 v18, 0 ; 7E240280 v_mad_f32 v16, v8, v0, v7 ; D2820010 041E0108 v_mul_f32_e32 v2, v13, v2 ; 1004050D v_mad_f32 v2, v3, v14, v2 ; D2820002 040A1D03 v_mad_f32 v15, v4, v0, v2 ; D282000F 040A0104 s_load_dwordx4 s[12:15], s[4:5], 0x10 ; C0860510 s_load_dwordx8 s[16:23], s[6:7], 0x20 ; C0C80720 v_mul_f32_e32 v2, v13, v5 ; 10040B0D v_mad_f32 v2, v6, v14, v2 ; D2820002 040A1D06 v_mad_f32 v17, v1, v0, v2 ; D2820011 040A0101 v_cubeid_f32 v4, v15, v16, v17 ; D2880004 0446210F v_cubema_f32 v3, v15, v16, v17 ; D28E0003 0446210F v_cubesc_f32 v2, v15, v16, v17 ; D28A0002 0446210F v_cubetc_f32 v1, v15, v16, v17 ; D28C0001 0446210F v_rcp_f32_e64 v5, |v3| ; D3540105 00000103 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[16:23], s[12:15] ; F0800700 0064060B v_mov_b32_e32 v15, 0x3fc00000 ; 7E1E02FF 3FC00000 v_mad_f32 v3, v1, v5, v15 ; D2820003 043E0B01 s_load_dwordx4 s[24:27], s[4:5], 0x8 ; C08C0508 s_load_dwordx8 s[28:35], s[6:7], 0x10 ; C0CE0710 s_load_dwordx4 s[36:39], s[4:5], 0xc ; C092050C s_load_dwordx8 s[40:47], s[6:7], 0x18 ; C0D40718 s_load_dwordx8 s[48:55], s[6:7], 0x0 ; C0D80700 v_mad_f32 v2, v2, v5, v15 ; D2820002 043E0B02 s_buffer_load_dword s56, s[0:3], 0x27 ; C21C0127 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_buffer_load_dword s57, s[0:3], 0x24 ; C21C8124 s_buffer_load_dword s58, s[0:3], 0x25 ; C21D0125 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[28:35], s[24:27] ; F0800700 00C70102 s_buffer_load_dword s24, s[0:3], 0x26 ; C20C0126 image_sample v4, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[40:47], s[36:39] ; F0800100 012A040B image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[48:55], s[8:11] ; F0800700 004C0F09 s_buffer_load_dword s8, s[0:3], 0x23 ; C2040123 s_buffer_load_dword s9, s[0:3], 0x20 ; C2048120 s_buffer_load_dword s10, s[0:3], 0x21 ; C2050121 image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[16:23], s[12:15] ; F0800700 00641209 v_mov_b32_e32 v5, s56 ; 7E0A0238 v_mul_f32_e32 v5, s57, v5 ; 100A0A39 v_mov_b32_e32 v9, s56 ; 7E120238 v_mul_f32_e32 v9, s58, v9 ; 1012123A v_mov_b32_e32 v10, s56 ; 7E140238 s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122 s_waitcnt vmcnt(3) lgkmcnt(0) ; BF8C0073 v_mul_f32_e32 v10, s24, v10 ; 10141418 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_mul_f32_e32 v6, v7, v9 ; 100C1307 v_mul_f32_e32 v7, v8, v10 ; 100E1508 s_waitcnt vmcnt(1) ; BF8C0771 v_mov_b32_e32 v8, s8 ; 7E100208 v_mul_f32_e32 v8, s9, v8 ; 10101009 v_mov_b32_e32 v9, s8 ; 7E120208 v_mul_f32_e32 v9, s10, v9 ; 1012120A v_mov_b32_e32 v10, s8 ; 7E140208 s_buffer_load_dword s8, s[0:3], 0x14 ; C2040114 s_load_dwordx4 s[12:15], s[4:5], 0x14 ; C0860514 s_load_dwordx8 s[16:23], s[6:7], 0x28 ; C0C80728 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, s11, v10 ; 1014140B v_mul_f32_e32 v1, v8, v1 ; 10020308 v_mul_f32_e32 v2, v9, v2 ; 10040509 s_buffer_load_dword s9, s[0:3], 0x17 ; C2048117 s_buffer_load_dword s10, s[0:3], 0x15 ; C2050115 s_buffer_load_dword s11, s[0:3], 0x16 ; C2058116 s_load_dwordx4 s[24:27], s[4:5], 0x18 ; C08C0518 s_load_dwordx8 s[28:35], s[6:7], 0x30 ; C0CE0730 v_mul_f32_e32 v3, v10, v3 ; 1006070A s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[8:10], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[16:23], s[12:15] ; F0800700 0064080B v_mov_b32_e32 v21, s8 ; 7E2A0208 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_buffer_load_dword s5, s[0:3], 0x19 ; C2028119 s_buffer_load_dword s6, s[0:3], 0x1a ; C203011A s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v8, s9, v8, v21 ; D2820008 04561009 v_mov_b32_e32 v21, s10 ; 7E2A020A v_mad_f32 v9, s9, v9, v21 ; D2820009 04561209 v_mov_b32_e32 v21, s11 ; 7E2A020B v_mad_f32 v10, s9, v10, v21 ; D282000A 04561409 image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[28:35], s[24:27] ; F0800100 00C70B0B s_buffer_load_dword s8, s[0:3], 0x12 ; C2040112 s_buffer_load_dword s9, s[0:3], 0x13 ; C2048113 s_buffer_load_dword s10, s[0:3], 0x1d ; C205011D s_buffer_load_dword s11, s[0:3], 0x1e ; C205811E s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 s_buffer_load_dword s14, s[0:3], 0x2 ; C2070102 s_buffer_load_dword s15, s[0:3], 0x10 ; C2078110 s_buffer_load_dword s16, s[0:3], 0x11 ; C2080111 s_buffer_load_dword s17, s[0:3], 0x28 ; C2088128 s_buffer_load_dword s18, s[0:3], 0x29 ; C2090129 s_buffer_load_dword s19, s[0:3], 0x2c ; C209812C s_buffer_load_dword s20, s[0:3], 0x2d ; C20A012D s_buffer_load_dword s0, s[0:3], 0x2e ; C200012E s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v12, s4, v15 ; 10181E04 v_mul_f32_e32 v15, s5, v16 ; 101E2005 v_mul_f32_e32 v16, s6, v17 ; 10202206 v_mul_f32_e32 v17, s7, v18 ; 10222407 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v18, s10, v19 ; 1024260A v_mul_f32_e32 v19, s11, v20 ; 1026280B v_mad_f32 v1, v4, v1, v5 ; D2820001 04160304 v_mad_f32 v2, v4, v2, v6 ; D2820002 041A0504 v_mad_f32 v3, v4, v3, v7 ; D2820003 041E0704 v_sub_f32_e64 v4, 1.0, s12 ; D2080004 000018F2 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_sub_f32_e64 v4, 1.0, s13 ; D2080004 00001AF2 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_sub_f32_e64 v4, 1.0, s14 ; D2080004 00001CF2 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_mov_b32_e32 v4, s15 ; 7E08020F v_mad_f32 v1, s9, v1, v4 ; D2820001 04120209 v_mov_b32_e32 v4, s16 ; 7E080210 v_mad_f32 v2, s9, v2, v4 ; D2820002 04120409 v_mov_b32_e32 v4, s8 ; 7E080208 v_mad_f32 v3, s9, v3, v4 ; D2820003 04120609 v_mov_b32_e32 v4, 0xbf3504f3 ; 7E0802FF BF3504F3 v_mul_f32_e32 v4, v13, v4 ; 1008090D v_mov_b32_e32 v5, 0xbed105ec ; 7E0A02FF BED105EC v_mad_f32 v4, v14, v5, v4 ; D2820004 04120B0E v_mul_f32_e32 v5, v5, v14 ; 100A1D05 v_mov_b32_e32 v6, 0x3f13cd3a ; 7E0C02FF 3F13CD3A v_mad_f32 v4, v0, v6, v4 ; D2820004 04120D00 v_mad_f32 v5, v0, v6, v5 ; D2820005 04160D00 v_mad_f32 v0, v0, v6, 0 ; D2820000 02020D00 v_madmk_f32_e32 v0, v14, v0, 0x3f5105ec ; 4000010E 3F5105EC v_madmk_f32_e32 v5, v13, v5, 0x3f3504f3 ; 400A0B0D 3F3504F3 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_max_f32_e32 v0, 0x358637bd, v0 ; 200000FF 358637BD v_max_f32_e32 v4, 0x358637bd, v4 ; 200808FF 358637BD v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_mul_f32_e32 v0, v0, v17 ; 10002300 v_mad_f32 v0, v18, v4, v0 ; D2820000 04020912 v_add_f32_e64 v4, 0, v5 clamp ; D2060804 00020A80 v_max_f32_e32 v4, 0x358637bd, v4 ; 200808FF 358637BD v_mul_f32_e32 v4, v4, v4 ; 10080904 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_mad_f32 v0, v19, v4, v0 ; D2820000 04020913 v_mul_f32_e32 v4, 0x3eaaaaab, v17 ; 100822FF 3EAAAAAB v_mov_b32_e32 v5, 0x3eaaaaab ; 7E0A02FF 3EAAAAAB v_mad_f32 v4, v18, v5, v4 ; D2820004 04120B12 v_mad_f32 v4, v19, v5, v4 ; D2820004 04120B13 v_mul_f32_e32 v5, v8, v12 ; 100A1908 v_mad_f32 v5, v0, v5, s12 ; D2820005 00320B00 v_mul_f32_e32 v6, v12, v1 ; 100C030C v_mad_f32 v5, v6, v4, v5 ; D2820005 04160906 v_mul_f32_e32 v6, v9, v15 ; 100C1F09 v_mad_f32 v6, v0, v6, s13 ; D2820006 00360D00 v_mul_f32_e32 v7, v15, v2 ; 100E050F v_mad_f32 v6, v7, v4, v6 ; D2820006 041A0907 v_mul_f32_e32 v7, v10, v16 ; 100E210A v_mad_f32 v0, v0, v7, s14 ; D2820000 003A0F00 v_mul_f32_e32 v7, v16, v3 ; 100E0710 v_mad_f32 v0, v7, v4, v0 ; D2820000 04020907 v_mad_f32 v1, v1, s19, v5 ; D2820001 04142701 v_mad_f32 v2, v2, s20, v6 ; D2820002 04182902 v_mad_f32 v0, v3, s0, v0 ; D2820000 04000103 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_max_f32_e32 v2, s18, v11 ; 20041612 v_min_f32_e32 v2, s17, v2 ; 1E040411 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 24 Code Size: 1320 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], COLOR, PERSPECTIVE DCL IN[1], COLOR[1], PERSPECTIVE DCL IN[2], TEXCOORD[0], PERSPECTIVE DCL IN[3], TEXCOORD[1], PERSPECTIVE DCL IN[4], TEXCOORD[4], PERSPECTIVE DCL IN[5], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL CONST[0..10] DCL TEMP[0], LOCAL DCL TEMP[1..4] IMM[0] FLT32 { 0.0000, 2.0000, -0.0000, 1.0000} IMM[1] FLT32 { 15.0000, 0.9151, 0.0000, 340282346638528859811704183484516925440.0000} 0: DP3 TEMP[0].x, IN[1], IN[1] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[1], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[0], IN[0] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[0], TEMP[0].xxxx 8: MUL TEMP[3].xyz, TEMP[1].zxyw, TEMP[2].yzxw 9: MAD TEMP[3].xyz, TEMP[1].yzxw, TEMP[2].zxyw, -TEMP[3] 10: MUL TEMP[3].xyz, TEMP[3], IN[1].wwww 11: DP3 TEMP[0].x, IN[5], IN[5] 12: RSQ TEMP[0].x, TEMP[0].xxxx 13: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 14: MUL TEMP[4].xyz, IN[5], TEMP[0].xxxx 15: MAD TEMP[4].xyz, TEMP[4].zzzz, IMM[0].xxyw, -TEMP[4] 16: DP3 TEMP[3].y, TEMP[3], TEMP[4] 17: DP3 TEMP[3].x, TEMP[2], TEMP[4] 18: DP3 TEMP[3].z, TEMP[1], TEMP[4] 19: TEX TEMP[1], TEMP[3], SAMP[0], CUBE 20: MUL TEMP[2].xyz, CONST[6], CONST[6].wwww 21: MUL TEMP[1].xyz, TEMP[1], TEMP[2] 22: MUL TEMP[2].xyz, CONST[7], CONST[7].wwww 23: TEX TEMP[3], IN[3], SAMP[2], 2D 24: MUL TEMP[2].xyz, TEMP[2], TEMP[3] 25: TEX TEMP[3], IN[3], SAMP[1], 2D 26: MAD TEMP[1].xyz, TEMP[3].xxxx, TEMP[1], TEMP[2] 27: MOV TEMP[1].w, IMM[0].wwww 28: ADD TEMP[2].xyz, TEMP[1].wwww, -CONST[0] 29: MUL TEMP[1].xyz, TEMP[1], TEMP[2] 30: MAD TEMP[1].xyz, TEMP[1], CONST[4].wwww, CONST[4] 31: DP3 TEMP[0].x, IN[4], IN[4] 32: RSQ TEMP[0].x, TEMP[0].xxxx 33: MIN TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 34: MUL TEMP[2].xyz, IN[4], TEMP[0].xxxx 35: MOV_SAT TEMP[1].w, TEMP[2].zzzz 36: DP3_SAT TEMP[2].x, TEMP[4], TEMP[2] 37: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1] 38: ADD TEMP[1].w, TEMP[1].wwww, IMM[0].zzzz 39: CMP TEMP[1].xyz, TEMP[1].wwww, IMM[0].xxxx, TEMP[1] 40: POW TEMP[1].w, |TEMP[2].xxxx|, IMM[1].xxxx 41: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz 42: MUL TEMP[1].w, TEMP[1].wwww, IMM[1].yyyy 43: TEX TEMP[3], IN[3], SAMP[3], 2D 44: MAD TEMP[2].yzw, TEMP[3].xxyz, CONST[5].wwww, CONST[5].xxyz 45: MUL TEMP[2].yzw, TEMP[1].wwww, TEMP[2] 46: CMP TEMP[2].xyz, TEMP[2].xxxx, IMM[0].xxxx, TEMP[2].yzww 47: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 48: TEX TEMP[2], IN[2], SAMP[5], 2D 49: ADD TEMP[1].w, TEMP[2].xxxx, CONST[10].xxxx 50: MUL_SAT TEMP[1].w, TEMP[1].wwww, CONST[10].yyyy 51: ADD TEMP[2].x, TEMP[1].wwww, IMM[0].zzzz 52: POW TEMP[2].y, |TEMP[1].wwww|, CONST[10].zzzz 53: CMP TEMP[1].w, TEMP[2].xxxx, IMM[0].xxxx, TEMP[2].yyyy 54: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1] 55: MUL OUT[0].xyz, TEMP[1], CONST[9] 56: TEX TEMP[1], IN[3], SAMP[4], 2D 57: MAX TEMP[2].x, TEMP[1].xxxx, CONST[8].yyyy 58: MIN_SAT OUT[0].w, CONST[8].xxxx, TEMP[2].xxxx 59: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %51 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %52 = load <32 x i8>, <32 x i8> addrspace(2)* %51, align 32, !tbaa !0 %53 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %56 = bitcast <8 x i32> addrspace(2)* %55 to <32 x i8> addrspace(2)* %57 = load <32 x i8>, <32 x i8> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %59 = bitcast <4 x i32> addrspace(2)* %58 to <16 x i8> addrspace(2)* %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %62 = bitcast <8 x i32> addrspace(2)* %61 to <32 x i8> addrspace(2)* %63 = load <32 x i8>, <32 x i8> addrspace(2)* %62, align 32, !tbaa !0 %64 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %65 = bitcast <4 x i32> addrspace(2)* %64 to <16 x i8> addrspace(2)* %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %68 = bitcast <8 x i32> addrspace(2)* %67 to <32 x i8> addrspace(2)* %69 = load <32 x i8>, <32 x i8> addrspace(2)* %68, align 32, !tbaa !0 %70 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %71 = bitcast <4 x i32> addrspace(2)* %70 to <16 x i8> addrspace(2)* %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %74 = bitcast <8 x i32> addrspace(2)* %73 to <32 x i8> addrspace(2)* %75 = load <32 x i8>, <32 x i8> addrspace(2)* %74, align 32, !tbaa !0 %76 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %77 = bitcast <4 x i32> addrspace(2)* %76 to <16 x i8> addrspace(2)* %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 %79 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %80 = bitcast <8 x i32> addrspace(2)* %79 to <32 x i8> addrspace(2)* %81 = load <32 x i8>, <32 x i8> addrspace(2)* %80, align 32, !tbaa !0 %82 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %83 = bitcast <4 x i32> addrspace(2)* %82 to <16 x i8> addrspace(2)* %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 %85 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %91 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %102 = fmul float %88, %88 %103 = fmul float %89, %89 %104 = fadd float %103, %102 %105 = fmul float %90, %90 %106 = fadd float %104, %105 %107 = call float @llvm.AMDGPU.rsq.clamped.f32(float %106) %108 = call float @llvm.minnum.f32(float %107, float 0x47EFFFFFE0000000) %109 = fmul float %88, %108 %110 = fmul float %89, %108 %111 = fmul float %90, %108 %112 = fmul float %85, %85 %113 = fmul float %86, %86 %114 = fadd float %113, %112 %115 = fmul float %87, %87 %116 = fadd float %114, %115 %117 = call float @llvm.AMDGPU.rsq.clamped.f32(float %116) %118 = call float @llvm.minnum.f32(float %117, float 0x47EFFFFFE0000000) %119 = fmul float %85, %118 %120 = fmul float %86, %118 %121 = fmul float %87, %118 %122 = fmul float %111, %120 %123 = fmul float %109, %121 %124 = fmul float %110, %119 %125 = fmul float %110, %121 %126 = fsub float %125, %122 %127 = fmul float %111, %119 %128 = fsub float %127, %123 %129 = fmul float %109, %120 %130 = fsub float %129, %124 %131 = fmul float %126, %91 %132 = fmul float %128, %91 %133 = fmul float %130, %91 %134 = fmul float %99, %99 %135 = fmul float %100, %100 %136 = fadd float %135, %134 %137 = fmul float %101, %101 %138 = fadd float %136, %137 %139 = call float @llvm.AMDGPU.rsq.clamped.f32(float %138) %140 = call float @llvm.minnum.f32(float %139, float 0x47EFFFFFE0000000) %141 = fmul float %99, %140 %142 = fmul float %100, %140 %143 = fmul float %101, %140 %144 = fmul float %143, 0.000000e+00 %145 = fsub float %144, %141 %146 = fmul float %143, 0.000000e+00 %147 = fsub float %146, %142 %148 = fmul float %143, 2.000000e+00 %149 = fsub float %148, %143 %150 = fmul float %131, %145 %151 = fmul float %132, %147 %152 = fadd float %151, %150 %153 = fmul float %133, %149 %154 = fadd float %152, %153 %155 = fmul float %119, %145 %156 = fmul float %120, %147 %157 = fadd float %156, %155 %158 = fmul float %121, %149 %159 = fadd float %157, %158 %160 = fmul float %109, %145 %161 = fmul float %110, %147 %162 = fadd float %161, %160 %163 = fmul float %111, %149 %164 = fadd float %162, %163 %165 = insertelement <4 x float> undef, float %159, i32 0 %166 = insertelement <4 x float> %165, float %154, i32 1 %167 = insertelement <4 x float> %166, float %164, i32 2 %168 = insertelement <4 x float> %167, float 0.000000e+00, i32 3 %169 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %168) %170 = extractelement <4 x float> %169, i32 0 %171 = extractelement <4 x float> %169, i32 1 %172 = extractelement <4 x float> %169, i32 2 %173 = extractelement <4 x float> %169, i32 3 %174 = call float @fabs(float %172) %175 = fdiv float 1.000000e+00, %174 %176 = fmul float %170, %175 %177 = fadd float %176, 1.500000e+00 %178 = fmul float %171, %175 %179 = fadd float %178, 1.500000e+00 %180 = bitcast float %179 to i32 %181 = bitcast float %177 to i32 %182 = bitcast float %173 to i32 %183 = insertelement <4 x i32> undef, i32 %180, i32 0 %184 = insertelement <4 x i32> %183, i32 %181, i32 1 %185 = insertelement <4 x i32> %184, i32 %182, i32 2 %186 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %185, <32 x i8> %52, <16 x i8> %54, i32 4) %187 = extractelement <4 x float> %186, i32 0 %188 = extractelement <4 x float> %186, i32 1 %189 = extractelement <4 x float> %186, i32 2 %190 = fmul float %35, %38 %191 = fmul float %36, %38 %192 = fmul float %37, %38 %193 = fmul float %187, %190 %194 = fmul float %188, %191 %195 = fmul float %189, %192 %196 = fmul float %39, %42 %197 = fmul float %40, %42 %198 = fmul float %41, %42 %199 = bitcast float %94 to i32 %200 = bitcast float %95 to i32 %201 = insertelement <2 x i32> undef, i32 %199, i32 0 %202 = insertelement <2 x i32> %201, i32 %200, i32 1 %203 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %202, <32 x i8> %63, <16 x i8> %66, i32 2) %204 = extractelement <4 x float> %203, i32 0 %205 = extractelement <4 x float> %203, i32 1 %206 = extractelement <4 x float> %203, i32 2 %207 = fmul float %196, %204 %208 = fmul float %197, %205 %209 = fmul float %198, %206 %210 = bitcast float %94 to i32 %211 = bitcast float %95 to i32 %212 = insertelement <2 x i32> undef, i32 %210, i32 0 %213 = insertelement <2 x i32> %212, i32 %211, i32 1 %214 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %213, <32 x i8> %57, <16 x i8> %60, i32 2) %215 = extractelement <4 x float> %214, i32 0 %216 = fmul float %215, %193 %217 = fadd float %216, %207 %218 = fmul float %215, %194 %219 = fadd float %218, %208 %220 = fmul float %215, %195 %221 = fadd float %220, %209 %222 = fsub float 1.000000e+00, %24 %223 = fsub float 1.000000e+00, %25 %224 = fsub float 1.000000e+00, %26 %225 = fmul float %217, %222 %226 = fmul float %219, %223 %227 = fmul float %221, %224 %228 = fmul float %225, %30 %229 = fadd float %228, %27 %230 = fmul float %226, %30 %231 = fadd float %230, %28 %232 = fmul float %227, %30 %233 = fadd float %232, %29 %234 = fmul float %96, %96 %235 = fmul float %97, %97 %236 = fadd float %235, %234 %237 = fmul float %98, %98 %238 = fadd float %236, %237 %239 = call float @llvm.AMDGPU.rsq.clamped.f32(float %238) %240 = call float @llvm.minnum.f32(float %239, float 0x47EFFFFFE0000000) %241 = fmul float %96, %240 %242 = fmul float %97, %240 %243 = fmul float %98, %240 %244 = call float @llvm.AMDIL.clamp.(float %243, float 0.000000e+00, float 1.000000e+00) %245 = fmul float %145, %241 %246 = fmul float %147, %242 %247 = fadd float %246, %245 %248 = fmul float %149, %243 %249 = fadd float %247, %248 %250 = call float @llvm.AMDIL.clamp.(float %249, float 0.000000e+00, float 1.000000e+00) %251 = fmul float %244, %229 %252 = fmul float %244, %231 %253 = fmul float %244, %233 %254 = fadd float %244, 0xBEB0C6F7A0000000 %255 = call float @llvm.AMDGPU.cndlt(float %254, float 0.000000e+00, float %251) %256 = call float @llvm.AMDGPU.cndlt(float %254, float 0.000000e+00, float %252) %257 = call float @llvm.AMDGPU.cndlt(float %254, float 0.000000e+00, float %253) %258 = call float @fabs(float %250) %259 = call float @llvm.pow.f32(float %258, float 1.500000e+01) %260 = fadd float %250, 0xBEB0C6F7A0000000 %261 = fmul float %259, 0x3FED48D5A0000000 %262 = bitcast float %94 to i32 %263 = bitcast float %95 to i32 %264 = insertelement <2 x i32> undef, i32 %262, i32 0 %265 = insertelement <2 x i32> %264, i32 %263, i32 1 %266 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %265, <32 x i8> %69, <16 x i8> %72, i32 2) %267 = extractelement <4 x float> %266, i32 0 %268 = extractelement <4 x float> %266, i32 1 %269 = extractelement <4 x float> %266, i32 2 %270 = fmul float %267, %34 %271 = fadd float %270, %31 %272 = fmul float %268, %34 %273 = fadd float %272, %32 %274 = fmul float %269, %34 %275 = fadd float %274, %33 %276 = fmul float %261, %271 %277 = fmul float %261, %273 %278 = fmul float %261, %275 %279 = call float @llvm.AMDGPU.cndlt(float %260, float 0.000000e+00, float %276) %280 = call float @llvm.AMDGPU.cndlt(float %260, float 0.000000e+00, float %277) %281 = call float @llvm.AMDGPU.cndlt(float %260, float 0.000000e+00, float %278) %282 = fadd float %255, %279 %283 = fadd float %256, %280 %284 = fadd float %257, %281 %285 = bitcast float %92 to i32 %286 = bitcast float %93 to i32 %287 = insertelement <2 x i32> undef, i32 %285, i32 0 %288 = insertelement <2 x i32> %287, i32 %286, i32 1 %289 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %288, <32 x i8> %81, <16 x i8> %84, i32 2) %290 = extractelement <4 x float> %289, i32 0 %291 = fadd float %290, %48 %292 = fmul float %291, %49 %293 = call float @llvm.AMDIL.clamp.(float %292, float 0.000000e+00, float 1.000000e+00) %294 = fadd float %293, 0xBEB0C6F7A0000000 %295 = call float @fabs(float %293) %296 = call float @llvm.pow.f32(float %295, float %50) %297 = call float @llvm.AMDGPU.cndlt(float %294, float 0.000000e+00, float %296) %298 = fmul float %297, %282 %299 = fmul float %297, %283 %300 = fmul float %297, %284 %301 = fmul float %298, %45 %302 = fmul float %299, %46 %303 = fmul float %300, %47 %304 = bitcast float %94 to i32 %305 = bitcast float %95 to i32 %306 = insertelement <2 x i32> undef, i32 %304, i32 0 %307 = insertelement <2 x i32> %306, i32 %305, i32 1 %308 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %307, <32 x i8> %75, <16 x i8> %78, i32 2) %309 = extractelement <4 x float> %308, i32 0 %310 = call float @llvm.maxnum.f32(float %309, float %44) %311 = call float @llvm.minnum.f32(float %43, float %310) %312 = call float @llvm.AMDIL.clamp.(float %311, float 0.000000e+00, float 1.000000e+00) %313 = call i32 @llvm.SI.packf16(float %301, float %302) %314 = bitcast i32 %313 to float %315 = call i32 @llvm.SI.packf16(float %303, float %312) %316 = bitcast i32 %315 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %314, float %316, float %314, float %316) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700 v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701 v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800 v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801 v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900 v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 v_interp_p1_f32 v13, v0, 0, 4, [m0] ; C8341000 v_interp_p2_f32 v13, [v13], v1, 0, 4, [m0] ; C8351001 v_interp_p1_f32 v14, v0, 1, 4, [m0] ; C8381100 v_interp_p2_f32 v14, [v14], v1, 1, 4, [m0] ; C8391101 v_interp_p1_f32 v15, v0, 2, 4, [m0] ; C83C1200 v_interp_p2_f32 v15, [v15], v1, 2, 4, [m0] ; C83D1201 v_interp_p1_f32 v16, v0, 0, 5, [m0] ; C8401400 v_interp_p2_f32 v16, [v16], v1, 0, 5, [m0] ; C8411401 v_interp_p1_f32 v17, v0, 1, 5, [m0] ; C8441500 v_mul_f32_e32 v18, v5, v5 ; 10240B05 v_mad_f32 v18, v6, v6, v18 ; D2820012 044A0D06 v_mad_f32 v18, v7, v7, v18 ; D2820012 044A0F07 v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 v_interp_p2_f32 v17, [v17], v1, 1, 5, [m0] ; C8451501 v_interp_p1_f32 v0, v0, 2, 5, [m0] ; C8001600 v_interp_p2_f32 v0, [v0], v1, 2, 5, [m0] ; C8011601 v_min_f32_e32 v1, 0x7f7fffff, v18 ; 1E0224FF 7F7FFFFF v_mul_f32_e32 v18, v2, v2 ; 10240502 v_mad_f32 v18, v3, v3, v18 ; D2820012 044A0703 v_mad_f32 v18, v4, v4, v18 ; D2820012 044A0904 v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 v_mul_f32_e32 v5, v1, v5 ; 100A0B01 v_mul_f32_e32 v6, v1, v6 ; 100C0D01 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_min_f32_e32 v7, 0x7f7fffff, v18 ; 1E0E24FF 7F7FFFFF v_mul_f32_e32 v2, v7, v2 ; 10040507 v_mul_f32_e32 v3, v7, v3 ; 10060707 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mul_f32_e32 v7, v3, v1 ; 100E0303 v_mad_f32 v7, v6, v4, -v7 ; D2820007 841E0906 v_mul_f32_e32 v18, v4, v5 ; 10240B04 v_mad_f32 v18, v1, v2, -v18 ; D2820012 844A0501 v_mul_f32_e32 v19, v2, v6 ; 10260D02 v_mad_f32 v19, v5, v3, -v19 ; D2820013 844E0705 v_mul_f32_e32 v20, v16, v16 ; 10282110 v_mad_f32 v20, v17, v17, v20 ; D2820014 04522311 v_mad_f32 v20, v0, v0, v20 ; D2820014 04520100 v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mul_f32_e32 v18, v8, v18 ; 10242508 v_mul_f32_e32 v8, v8, v19 ; 10102708 v_min_f32_e32 v19, 0x7f7fffff, v20 ; 1E2628FF 7F7FFFFF v_mul_f32_e32 v20, v19, v0 ; 10280113 v_mad_f32 v21, v0, v19, v20 ; D2820015 04522700 v_mad_f32 v0, -v0, v19, v21 ; D2820000 24562700 v_mul_f32_e32 v16, v19, v16 ; 10202113 v_mul_f32_e32 v17, v19, v17 ; 10222313 v_mad_f32 v16, 0, v20, -v16 ; D2820010 84422880 v_mad_f32 v17, 0, v20, -v17 ; D2820011 84462880 v_mul_f32_e32 v7, v16, v7 ; 100E0F10 v_mad_f32 v7, v18, v17, v7 ; D2820007 041E2312 v_mov_b32_e32 v21, 0 ; 7E2A0280 v_mad_f32 v19, v8, v0, v7 ; D2820013 041E0108 v_mul_f32_e32 v2, v16, v2 ; 10040510 v_mad_f32 v2, v3, v17, v2 ; D2820002 040A2303 v_mad_f32 v18, v4, v0, v2 ; D2820012 040A0104 v_mul_f32_e32 v2, v16, v5 ; 10040B10 v_mad_f32 v2, v6, v17, v2 ; D2820002 040A2306 v_mad_f32 v20, v1, v0, v2 ; D2820014 040A0101 v_cubeid_f32 v4, v18, v19, v20 ; D2880004 04522712 v_cubema_f32 v3, v18, v19, v20 ; D28E0003 04522712 v_cubesc_f32 v2, v18, v19, v20 ; D28A0002 04522712 v_cubetc_f32 v1, v18, v19, v20 ; D28C0001 04522712 v_rcp_f32_e64 v5, |v3| ; D3540105 00000103 v_mov_b32_e32 v6, 0x3fc00000 ; 7E0C02FF 3FC00000 v_mad_f32 v3, v1, v5, v6 ; D2820003 041A0B01 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s48, s[0:3], 0x14 ; C2180114 s_load_dwordx4 s[52:55], s[4:5], 0x4 ; C09A0504 s_load_dwordx4 s[56:59], s[4:5], 0x8 ; C09C0508 s_load_dwordx8 s[60:67], s[6:7], 0x0 ; C0DE0700 s_load_dwordx8 s[68:75], s[6:7], 0x10 ; C0E20710 s_load_dwordx4 s[36:39], s[4:5], 0xc ; C092050C s_load_dwordx4 s[12:15], s[4:5], 0x10 ; C0860510 s_load_dwordx4 s[24:27], s[4:5], 0x14 ; C08C0514 s_load_dwordx8 s[76:83], s[6:7], 0x8 ; C0E60708 s_load_dwordx8 s[40:47], s[6:7], 0x18 ; C0D40718 s_load_dwordx8 s[28:35], s[6:7], 0x28 ; C0CE0728 s_load_dwordx8 s[16:23], s[6:7], 0x20 ; C0C80720 v_mad_f32 v2, v2, v5, v6 ; D2820002 041A0B02 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[60:67], s[8:11] ; F0800700 004F0102 image_sample v[4:6], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[68:75], s[56:59] ; F0800700 01D1040B s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_buffer_load_dword s5, s[0:3], 0x15 ; C2028115 s_buffer_load_dword s6, s[0:3], 0x16 ; C2030116 image_sample v7, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[76:83], s[52:55] ; F0800100 01B3070B image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[40:47], s[36:39] ; F0800700 012A120B image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[28:35], s[24:27] ; F0800100 00C70809 image_sample v9, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[16:23], s[12:15] ; F0800100 0064090B v_mov_b32_e32 v10, s48 ; 7E140230 s_buffer_load_dword s7, s[0:3], 0x18 ; C2038118 s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119 s_buffer_load_dword s9, s[0:3], 0x1a ; C204811A s_buffer_load_dword s10, s[0:3], 0x1b ; C205011B s_waitcnt vmcnt(2) lgkmcnt(0) ; BF8C0072 v_mad_f32 v10, s4, v18, v10 ; D282000A 042A2404 v_mov_b32_e32 v11, s5 ; 7E160205 v_mad_f32 v11, s4, v19, v11 ; D282000B 042E2604 v_mov_b32_e32 v12, s6 ; 7E180206 v_mad_f32 v12, s4, v20, v12 ; D282000C 04322804 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_buffer_load_dword s5, s[0:3], 0x13 ; C2028113 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101 s_buffer_load_dword s12, s[0:3], 0x2 ; C2060102 s_buffer_load_dword s13, s[0:3], 0x10 ; C2068110 s_buffer_load_dword s14, s[0:3], 0x11 ; C2070111 s_buffer_load_dword s15, s[0:3], 0x1c ; C207811C s_buffer_load_dword s16, s[0:3], 0x1d ; C208011D s_buffer_load_dword s17, s[0:3], 0x1e ; C208811E s_buffer_load_dword s18, s[0:3], 0x1f ; C209011F s_buffer_load_dword s19, s[0:3], 0x20 ; C2098120 s_buffer_load_dword s20, s[0:3], 0x21 ; C20A0121 s_buffer_load_dword s21, s[0:3], 0x24 ; C20A8124 s_buffer_load_dword s22, s[0:3], 0x25 ; C20B0125 s_buffer_load_dword s23, s[0:3], 0x26 ; C20B8126 s_buffer_load_dword s24, s[0:3], 0x28 ; C20C0128 s_buffer_load_dword s25, s[0:3], 0x29 ; C20C8129 s_buffer_load_dword s0, s[0:3], 0x2a ; C200012A s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v18, s10 ; 7E24020A v_mul_f32_e32 v18, s7, v18 ; 10242407 v_mov_b32_e32 v19, s10 ; 7E26020A v_mul_f32_e32 v19, s8, v19 ; 10262608 v_mov_b32_e32 v20, s10 ; 7E28020A v_mul_f32_e32 v20, s9, v20 ; 10282809 v_mul_f32_e32 v1, v18, v1 ; 10020312 v_mul_f32_e32 v2, v19, v2 ; 10040513 v_mul_f32_e32 v3, v20, v3 ; 10060714 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v18, s18 ; 7E240212 v_mul_f32_e32 v18, s15, v18 ; 1024240F v_mov_b32_e32 v19, s18 ; 7E260212 v_mul_f32_e32 v19, s16, v19 ; 10262610 v_mov_b32_e32 v20, s18 ; 7E280212 v_mul_f32_e32 v20, s17, v20 ; 10282811 v_mul_f32_e32 v4, v4, v18 ; 10082504 v_mul_f32_e32 v5, v5, v19 ; 100A2705 v_mul_f32_e32 v6, v6, v20 ; 100C2906 v_mad_f32 v1, v7, v1, v4 ; D2820001 04120307 v_mad_f32 v2, v7, v2, v5 ; D2820002 04160507 v_mad_f32 v3, v7, v3, v6 ; D2820003 041A0707 v_sub_f32_e64 v4, 1.0, s6 ; D2080004 00000CF2 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_sub_f32_e64 v4, 1.0, s11 ; D2080004 000016F2 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_sub_f32_e64 v4, 1.0, s12 ; D2080004 000018F2 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_mov_b32_e32 v4, s13 ; 7E08020D v_mul_f32_e32 v5, v13, v13 ; 100A1B0D v_mad_f32 v5, v14, v14, v5 ; D2820005 04161D0E v_mad_f32 v5, v15, v15, v5 ; D2820005 04161F0F v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mad_f32 v1, s5, v1, v4 ; D2820001 04120205 v_mov_b32_e32 v4, s14 ; 7E08020E v_mad_f32 v2, s5, v2, v4 ; D2820002 04120405 v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v3, s5, v3, v4 ; D2820003 04120605 v_min_f32_e32 v4, 0x7f7fffff, v5 ; 1E080AFF 7F7FFFFF v_mul_f32_e32 v5, v4, v13 ; 100A1B04 v_mul_f32_e32 v5, v5, v16 ; 100A2105 v_mul_f32_e32 v6, v4, v14 ; 100C1D04 v_mad_f32 v5, v17, v6, v5 ; D2820005 04160D11 v_mul_f32_e32 v4, v4, v15 ; 10081F04 v_mad_f32 v0, v0, v4, v5 ; D2820000 04160900 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v1, v1, v4 ; 10020901 v_mul_f32_e32 v2, v2, v4 ; 10040902 v_mul_f32_e32 v3, v3, v4 ; 10060903 v_mov_b32_e32 v5, 0xb58637bd ; 7E0A02FF B58637BD v_add_f32_e32 v4, v5, v4 ; 06080905 v_mov_b32_e32 v6, 0x7fffffff ; 7E0C02FF 7FFFFFFF v_and_b32_e32 v7, v0, v6 ; 360E0D00 v_log_f32_e32 v7, v7 ; 7E0E4F07 v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_mul_legacy_f32_e32 v4, 0x41700000, v7 ; 0E080EFF 41700000 v_exp_f32_e32 v4, v4 ; 7E084B04 v_mul_f32_e32 v4, 0x3f6a46ad, v4 ; 100808FF 3F6A46AD v_mul_f32_e32 v7, v10, v4 ; 100E090A v_mul_f32_e32 v10, v11, v4 ; 1014090B v_mul_f32_e32 v4, v12, v4 ; 1008090C v_add_f32_e32 v0, v5, v0 ; 06000105 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v7, 0, vcc ; D2000000 01A90107 v_cndmask_b32_e64 v7, v10, 0, vcc ; D2000007 01A9010A v_cndmask_b32_e64 v4, v4, 0, vcc ; D2000004 01A90104 v_add_f32_e32 v8, s24, v8 ; 06101018 v_add_f32_e32 v0, v0, v1 ; 06000300 v_add_f32_e32 v1, v7, v2 ; 06020507 v_mul_f32_e32 v2, s25, v8 ; 10041019 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_and_b32_e32 v6, v2, v6 ; 360C0D02 v_log_f32_e32 v6, v6 ; 7E0C4F06 v_add_f32_e32 v3, v4, v3 ; 06060704 v_add_f32_e32 v2, v5, v2 ; 06040505 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_mul_legacy_f32_e32 v2, s0, v6 ; 0E040C00 v_exp_f32_e32 v2, v2 ; 7E044B02 v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_mul_f32_e32 v1, v1, v2 ; 10020501 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mul_f32_e32 v0, s21, v0 ; 10000015 v_mul_f32_e32 v1, s22, v1 ; 10020216 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_mul_f32_e32 v1, s23, v2 ; 10020417 v_max_f32_e32 v2, s20, v9 ; 20041214 v_min_f32_e32 v2, s19, v2 ; 1E040413 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 88 VGPRS: 24 Code Size: 1248 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL IN[2], TEXCOORD[5], PERSPECTIVE DCL IN[3], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL CONST[0..10] DCL TEMP[0], LOCAL DCL TEMP[1..5] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { 0.8165, 0.5774, 0.0000, 0.0039} IMM[2] FLT32 { -0.7071, -0.4082, 0.5774, 0.7071} IMM[3] FLT32 {340282346638528859811704183484516925440.0000, -340282346638528859811704183484516925440.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[3], IN[3] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[3].xxxx, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[3], TEMP[0].xxxx 4: TEX TEMP[2], IN[1], SAMP[2], 2D 5: MAD TEMP[2].xyz, TEMP[2], IMM[0].xxxx, IMM[0].yyyy 6: DP3 TEMP[0].x, TEMP[2], TEMP[2] 7: RSQ TEMP[0].x, TEMP[0].xxxx 8: MIN TEMP[0].x, IMM[3].xxxx, TEMP[0].xxxx 9: MUL TEMP[3].xyz, TEMP[2], TEMP[0].xxxx 10: DP3 TEMP[1].w, TEMP[3], TEMP[1] 11: MUL TEMP[2].xyz, TEMP[1].wwww, TEMP[3] 12: MAD TEMP[1].xyz, TEMP[2], IMM[0].xxxx, -TEMP[1] 13: DP2 TEMP[0].x, TEMP[1].yzzw, IMM[1] 14: ADD_SAT TEMP[2].x, IMM[1].zzzz, TEMP[0].xxxx 15: DP3_SAT TEMP[2].y, TEMP[1], IMM[2] 16: DP3_SAT TEMP[2].z, TEMP[1].yzxw, IMM[2].yzww 17: MAX TEMP[1].xyz, TEMP[2], IMM[0].wwww 18: LG2 TEMP[0].x, |TEMP[1].xxxx| 19: MAX TEMP[2].x, IMM[3].yyyy, TEMP[0].xxxx 20: LG2 TEMP[0].x, |TEMP[1].yyyy| 21: MAX TEMP[2].y, IMM[3].yyyy, TEMP[0].xxxx 22: LG2 TEMP[0].x, |TEMP[1].zzzz| 23: MAX TEMP[2].z, IMM[3].yyyy, TEMP[0].xxxx 24: MOV TEMP[1].z, IMM[0].zzzz 25: ADD TEMP[1].x, TEMP[1].zzzz, CONST[9].yyyy 26: MUL TEMP[1].xyw, TEMP[2].xyzz, TEMP[1].xxxx 27: EX2 TEMP[2].x, TEMP[1].xxxx 28: EX2 TEMP[2].y, TEMP[1].yyyy 29: EX2 TEMP[2].z, TEMP[1].wwww 30: TEX TEMP[4], IN[0], SAMP[1], 2D 31: MUL TEMP[1].xyw, TEMP[4].xyzz, CONST[7].xyzz 32: DP3 TEMP[2].x, TEMP[1].xyww, TEMP[2] 33: TEX TEMP[4], IN[1], SAMP[5], 2D 34: MAD TEMP[2].yzw, TEMP[4].xxyz, CONST[5].wwww, CONST[5].xxyz 35: TEX TEMP[4], IN[0], SAMP[0], 2D 36: MUL TEMP[4].xyz, TEMP[4], CONST[6] 37: MUL TEMP[2].yzw, TEMP[2], TEMP[4].xxyz 38: MUL TEMP[2].xyz, TEMP[2].xxxx, TEMP[2].yzww 39: DP2 TEMP[0].x, TEMP[3].yzzw, IMM[1] 40: ADD_SAT TEMP[5].x, IMM[1].zzzz, TEMP[0].xxxx 41: DP3_SAT TEMP[5].y, TEMP[3], IMM[2] 42: DP3_SAT TEMP[5].z, TEMP[3].yzxw, IMM[2].yzww 43: MUL TEMP[3].xyz, TEMP[5], TEMP[5] 44: MAX TEMP[5].xyz, TEMP[3], IMM[0].wwww 45: DP3 TEMP[1].x, TEMP[1].xyww, TEMP[5] 46: ADD TEMP[1].yzw, TEMP[1].zzzz, -CONST[0].xxyz 47: TEX TEMP[3], IN[1], SAMP[4], 2D 48: MUL TEMP[1].yzw, TEMP[1], TEMP[3].xxyz 49: MAD TEMP[1].yzw, TEMP[1], CONST[4].wwww, CONST[4].xxyz 50: MUL TEMP[3].xyz, TEMP[4], TEMP[1].yzww 51: MAD TEMP[2].xyz, TEMP[3], TEMP[1].xxxx, TEMP[2] 52: MUL TEMP[3].xyz, CONST[8], CONST[8].wwww 53: TEX TEMP[4], IN[1], SAMP[3], 2D 54: MAD TEMP[3].xyz, TEMP[4].xxxx, TEMP[3], CONST[0] 55: ADD TEMP[2].xyz, TEMP[2], TEMP[3] 56: MAD OUT[0].xyz, TEMP[1].yzww, CONST[10], TEMP[2] 57: MUL OUT[0].w, IMM[1].wwww, IN[2].wwww 58: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %49 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %50 = load <32 x i8>, <32 x i8> addrspace(2)* %49, align 32, !tbaa !0 %51 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %54 = bitcast <8 x i32> addrspace(2)* %53 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %57 = bitcast <4 x i32> addrspace(2)* %56 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %60 = bitcast <8 x i32> addrspace(2)* %59 to <32 x i8> addrspace(2)* %61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %63 = bitcast <4 x i32> addrspace(2)* %62 to <16 x i8> addrspace(2)* %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %72 = bitcast <8 x i32> addrspace(2)* %71 to <32 x i8> addrspace(2)* %73 = load <32 x i8>, <32 x i8> addrspace(2)* %72, align 32, !tbaa !0 %74 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %75 = bitcast <4 x i32> addrspace(2)* %74 to <16 x i8> addrspace(2)* %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 %77 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %78 = bitcast <8 x i32> addrspace(2)* %77 to <32 x i8> addrspace(2)* %79 = load <32 x i8>, <32 x i8> addrspace(2)* %78, align 32, !tbaa !0 %80 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %81 = bitcast <4 x i32> addrspace(2)* %80 to <16 x i8> addrspace(2)* %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %91 = fmul float %88, %88 %92 = fmul float %89, %89 %93 = fadd float %92, %91 %94 = fmul float %90, %90 %95 = fadd float %93, %94 %96 = call float @llvm.AMDGPU.rsq.clamped.f32(float %95) %97 = call float @llvm.minnum.f32(float %96, float 0x47EFFFFFE0000000) %98 = fmul float %88, %97 %99 = fmul float %89, %97 %100 = fmul float %90, %97 %101 = bitcast float %85 to i32 %102 = bitcast float %86 to i32 %103 = insertelement <2 x i32> undef, i32 %101, i32 0 %104 = insertelement <2 x i32> %103, i32 %102, i32 1 %105 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %104, <32 x i8> %61, <16 x i8> %64, i32 2) %106 = extractelement <4 x float> %105, i32 0 %107 = extractelement <4 x float> %105, i32 1 %108 = extractelement <4 x float> %105, i32 2 %109 = fmul float %106, 2.000000e+00 %110 = fadd float %109, -1.000000e+00 %111 = fmul float %107, 2.000000e+00 %112 = fadd float %111, -1.000000e+00 %113 = fmul float %108, 2.000000e+00 %114 = fadd float %113, -1.000000e+00 %115 = fmul float %110, %110 %116 = fmul float %112, %112 %117 = fadd float %116, %115 %118 = fmul float %114, %114 %119 = fadd float %117, %118 %120 = call float @llvm.AMDGPU.rsq.clamped.f32(float %119) %121 = call float @llvm.minnum.f32(float %120, float 0x47EFFFFFE0000000) %122 = fmul float %110, %121 %123 = fmul float %112, %121 %124 = fmul float %114, %121 %125 = fmul float %122, %98 %126 = fmul float %123, %99 %127 = fadd float %126, %125 %128 = fmul float %124, %100 %129 = fadd float %127, %128 %130 = fmul float %129, %122 %131 = fmul float %129, %123 %132 = fmul float %129, %124 %133 = fmul float %130, 2.000000e+00 %134 = fsub float %133, %98 %135 = fmul float %131, 2.000000e+00 %136 = fsub float %135, %99 %137 = fmul float %132, 2.000000e+00 %138 = fsub float %137, %100 %139 = fmul float %136, 0x3FEA20BD80000000 %140 = fmul float %138, 0x3FE279A740000000 %141 = fadd float %139, %140 %142 = fadd float %141, 0.000000e+00 %143 = call float @llvm.AMDIL.clamp.(float %142, float 0.000000e+00, float 1.000000e+00) %144 = fmul float %134, 0xBFE6A09E60000000 %145 = fmul float %136, 0xBFDA20BD80000000 %146 = fadd float %145, %144 %147 = fmul float %138, 0x3FE279A740000000 %148 = fadd float %146, %147 %149 = call float @llvm.AMDIL.clamp.(float %148, float 0.000000e+00, float 1.000000e+00) %150 = fmul float %136, 0xBFDA20BD80000000 %151 = fmul float %138, 0x3FE279A740000000 %152 = fadd float %151, %150 %153 = fmul float %134, 0x3FE6A09E60000000 %154 = fadd float %152, %153 %155 = call float @llvm.AMDIL.clamp.(float %154, float 0.000000e+00, float 1.000000e+00) %156 = call float @llvm.maxnum.f32(float %143, float 0x3EB0C6F7A0000000) %157 = call float @llvm.maxnum.f32(float %149, float 0x3EB0C6F7A0000000) %158 = call float @llvm.maxnum.f32(float %155, float 0x3EB0C6F7A0000000) %159 = call float @fabs(float %156) %160 = call float @llvm.log2.f32(float %159) %161 = call float @llvm.maxnum.f32(float %160, float 0xC7EFFFFFE0000000) %162 = call float @fabs(float %157) %163 = call float @llvm.log2.f32(float %162) %164 = call float @llvm.maxnum.f32(float %163, float 0xC7EFFFFFE0000000) %165 = call float @fabs(float %158) %166 = call float @llvm.log2.f32(float %165) %167 = call float @llvm.maxnum.f32(float %166, float 0xC7EFFFFFE0000000) %168 = fadd float %45, 1.000000e+00 %169 = fmul float %161, %168 %170 = fmul float %164, %168 %171 = fmul float %167, %168 %172 = call float @llvm.AMDIL.exp.(float %169) %173 = call float @llvm.AMDIL.exp.(float %170) %174 = call float @llvm.AMDIL.exp.(float %171) %175 = bitcast float %83 to i32 %176 = bitcast float %84 to i32 %177 = insertelement <2 x i32> undef, i32 %175, i32 0 %178 = insertelement <2 x i32> %177, i32 %176, i32 1 %179 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %178, <32 x i8> %55, <16 x i8> %58, i32 2) %180 = extractelement <4 x float> %179, i32 0 %181 = extractelement <4 x float> %179, i32 1 %182 = extractelement <4 x float> %179, i32 2 %183 = fmul float %180, %38 %184 = fmul float %181, %39 %185 = fmul float %182, %40 %186 = fmul float %183, %172 %187 = fmul float %184, %173 %188 = fadd float %187, %186 %189 = fmul float %185, %174 %190 = fadd float %188, %189 %191 = bitcast float %85 to i32 %192 = bitcast float %86 to i32 %193 = insertelement <2 x i32> undef, i32 %191, i32 0 %194 = insertelement <2 x i32> %193, i32 %192, i32 1 %195 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %194, <32 x i8> %79, <16 x i8> %82, i32 2) %196 = extractelement <4 x float> %195, i32 0 %197 = extractelement <4 x float> %195, i32 1 %198 = extractelement <4 x float> %195, i32 2 %199 = fmul float %196, %34 %200 = fadd float %199, %31 %201 = fmul float %197, %34 %202 = fadd float %201, %32 %203 = fmul float %198, %34 %204 = fadd float %203, %33 %205 = bitcast float %83 to i32 %206 = bitcast float %84 to i32 %207 = insertelement <2 x i32> undef, i32 %205, i32 0 %208 = insertelement <2 x i32> %207, i32 %206, i32 1 %209 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %208, <32 x i8> %50, <16 x i8> %52, i32 2) %210 = extractelement <4 x float> %209, i32 0 %211 = extractelement <4 x float> %209, i32 1 %212 = extractelement <4 x float> %209, i32 2 %213 = fmul float %210, %35 %214 = fmul float %211, %36 %215 = fmul float %212, %37 %216 = fmul float %200, %213 %217 = fmul float %202, %214 %218 = fmul float %204, %215 %219 = fmul float %190, %216 %220 = fmul float %190, %217 %221 = fmul float %190, %218 %222 = fmul float %123, 0x3FEA20BD80000000 %223 = fmul float %124, 0x3FE279A740000000 %224 = fadd float %222, %223 %225 = fadd float %224, 0.000000e+00 %226 = call float @llvm.AMDIL.clamp.(float %225, float 0.000000e+00, float 1.000000e+00) %227 = fmul float %122, 0xBFE6A09E60000000 %228 = fmul float %123, 0xBFDA20BD80000000 %229 = fadd float %228, %227 %230 = fmul float %124, 0x3FE279A740000000 %231 = fadd float %229, %230 %232 = call float @llvm.AMDIL.clamp.(float %231, float 0.000000e+00, float 1.000000e+00) %233 = fmul float %123, 0xBFDA20BD80000000 %234 = fmul float %124, 0x3FE279A740000000 %235 = fadd float %234, %233 %236 = fmul float %122, 0x3FE6A09E60000000 %237 = fadd float %235, %236 %238 = call float @llvm.AMDIL.clamp.(float %237, float 0.000000e+00, float 1.000000e+00) %239 = fmul float %226, %226 %240 = fmul float %232, %232 %241 = fmul float %238, %238 %242 = call float @llvm.maxnum.f32(float %239, float 0x3EB0C6F7A0000000) %243 = call float @llvm.maxnum.f32(float %240, float 0x3EB0C6F7A0000000) %244 = call float @llvm.maxnum.f32(float %241, float 0x3EB0C6F7A0000000) %245 = fmul float %183, %242 %246 = fmul float %184, %243 %247 = fadd float %246, %245 %248 = fmul float %185, %244 %249 = fadd float %247, %248 %250 = fsub float 1.000000e+00, %24 %251 = fsub float 1.000000e+00, %25 %252 = fsub float 1.000000e+00, %26 %253 = bitcast float %85 to i32 %254 = bitcast float %86 to i32 %255 = insertelement <2 x i32> undef, i32 %253, i32 0 %256 = insertelement <2 x i32> %255, i32 %254, i32 1 %257 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %256, <32 x i8> %73, <16 x i8> %76, i32 2) %258 = extractelement <4 x float> %257, i32 0 %259 = extractelement <4 x float> %257, i32 1 %260 = extractelement <4 x float> %257, i32 2 %261 = fmul float %250, %258 %262 = fmul float %251, %259 %263 = fmul float %252, %260 %264 = fmul float %261, %30 %265 = fadd float %264, %27 %266 = fmul float %262, %30 %267 = fadd float %266, %28 %268 = fmul float %263, %30 %269 = fadd float %268, %29 %270 = fmul float %213, %265 %271 = fmul float %214, %267 %272 = fmul float %215, %269 %273 = fmul float %270, %249 %274 = fadd float %273, %219 %275 = fmul float %271, %249 %276 = fadd float %275, %220 %277 = fmul float %272, %249 %278 = fadd float %277, %221 %279 = fmul float %41, %44 %280 = fmul float %42, %44 %281 = fmul float %43, %44 %282 = bitcast float %85 to i32 %283 = bitcast float %86 to i32 %284 = insertelement <2 x i32> undef, i32 %282, i32 0 %285 = insertelement <2 x i32> %284, i32 %283, i32 1 %286 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %285, <32 x i8> %67, <16 x i8> %70, i32 2) %287 = extractelement <4 x float> %286, i32 0 %288 = fmul float %287, %279 %289 = fadd float %288, %24 %290 = fmul float %287, %280 %291 = fadd float %290, %25 %292 = fmul float %287, %281 %293 = fadd float %292, %26 %294 = fadd float %274, %289 %295 = fadd float %276, %291 %296 = fadd float %278, %293 %297 = fmul float %265, %46 %298 = fadd float %297, %294 %299 = fmul float %267, %47 %300 = fadd float %299, %295 %301 = fmul float %269, %48 %302 = fadd float %301, %296 %303 = fmul float %87, 3.906250e-03 %304 = call i32 @llvm.SI.packf16(float %298, float %300) %305 = bitcast i32 %304 to float %306 = call i32 @llvm.SI.packf16(float %302, float %303) %307 = bitcast i32 %306 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %305, float %307, float %305, float %307) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v2, 0x3f13cd3a ; 7E0402FF 3F13CD3A v_mov_b32_e32 v3, 0x3f5105ec ; 7E0602FF 3F5105EC v_mov_b32_e32 v4, 0xbf3504f3 ; 7E0802FF BF3504F3 v_mov_b32_e32 v5, 0xbed105ec ; 7E0A02FF BED105EC v_mov_b32_e32 v6, 0x3f3504f3 ; 7E0C02FF 3F3504F3 v_mov_b32_e32 v7, 0xff7fffff ; 7E0E02FF FF7FFFFF v_interp_p1_f32 v8, v0, 0, 0, [m0] ; C8200000 v_interp_p2_f32 v8, [v8], v1, 0, 0, [m0] ; C8210001 v_interp_p1_f32 v9, v0, 1, 0, [m0] ; C8240100 v_interp_p2_f32 v9, [v9], v1, 1, 0, [m0] ; C8250101 v_interp_p1_f32 v10, v0, 0, 1, [m0] ; C8280400 v_interp_p2_f32 v10, [v10], v1, 0, 1, [m0] ; C8290401 v_interp_p1_f32 v11, v0, 1, 1, [m0] ; C82C0500 v_interp_p2_f32 v11, [v11], v1, 1, 1, [m0] ; C82D0501 v_interp_p1_f32 v12, v0, 3, 2, [m0] ; C8300B00 v_interp_p2_f32 v12, [v12], v1, 3, 2, [m0] ; C8310B01 v_mul_f32_e32 v12, 0x3b800000, v12 ; 101818FF 3B800000 v_interp_p1_f32 v13, v0, 0, 3, [m0] ; C8340C00 v_interp_p2_f32 v13, [v13], v1, 0, 3, [m0] ; C8350C01 v_interp_p1_f32 v14, v0, 1, 3, [m0] ; C8380D00 v_interp_p2_f32 v14, [v14], v1, 1, 3, [m0] ; C8390D01 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 s_load_dwordx4 s[40:43], s[4:5], 0x0 ; C0940500 s_load_dwordx4 s[56:59], s[4:5], 0x4 ; C09C0504 s_load_dwordx4 s[60:63], s[4:5], 0x8 ; C09E0508 s_load_dwordx4 s[16:19], s[4:5], 0xc ; C088050C s_load_dwordx4 s[28:31], s[4:5], 0x10 ; C08E0510 s_load_dwordx4 s[52:55], s[4:5], 0x14 ; C09A0514 s_load_dwordx8 s[44:51], s[6:7], 0x0 ; C0D60700 s_load_dwordx8 s[64:71], s[6:7], 0x8 ; C0E00708 s_load_dwordx8 s[72:79], s[6:7], 0x10 ; C0E40710 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[8:11], 0x0 ; C2000900 s_buffer_load_dword s1, s[8:11], 0x1 ; C2008901 s_buffer_load_dword s2, s[8:11], 0x2 ; C2010902 s_buffer_load_dword s13, s[8:11], 0x10 ; C2068910 s_buffer_load_dword s12, s[8:11], 0x11 ; C2060911 s_buffer_load_dword s14, s[8:11], 0x17 ; C2070917 s_buffer_load_dword s15, s[8:11], 0x18 ; C2078918 s_buffer_load_dword s80, s[8:11], 0x19 ; C2280919 s_buffer_load_dword s81, s[8:11], 0x1a ; C228891A s_buffer_load_dword s82, s[8:11], 0x1c ; C229091C s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v1, 1.0, s0 ; D2080001 000000F2 v_sub_f32_e64 v15, 1.0, s1 ; D208000F 000002F2 v_sub_f32_e64 v16, 1.0, s2 ; D2080010 000004F2 s_buffer_load_dword s83, s[8:11], 0x1d ; C229891D s_buffer_load_dword s84, s[8:11], 0x1e ; C22A091E s_buffer_load_dword s5, s[8:11], 0x20 ; C2028920 s_buffer_load_dword s4, s[8:11], 0x21 ; C2020921 s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922 s_buffer_load_dword s85, s[8:11], 0x14 ; C22A8914 s_buffer_load_dword s86, s[8:11], 0x15 ; C22B0915 s_buffer_load_dword s87, s[8:11], 0x16 ; C22B8916 s_load_dwordx8 s[20:27], s[6:7], 0x18 ; C0CA0718 s_load_dwordx8 s[32:39], s[6:7], 0x20 ; C0D00720 image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[72:79], s[60:63] ; F0800700 01F2110A image_sample v[20:22], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[64:71], s[56:59] ; F0800700 01D01408 s_load_dwordx8 s[56:63], s[6:7], 0x28 ; C0DC0728 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v23, s85 ; 7E2E0255 v_mov_b32_e32 v24, s86 ; 7E300256 v_mov_b32_e32 v25, s87 ; 7E320257 s_waitcnt vmcnt(0) ; BF8C0770 image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[56:63], s[52:55] ; F0800700 01AE1A0A image_sample v[29:31], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[44:51], s[40:43] ; F0800700 014B1D08 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v8, s14, v26, v23 ; D2820008 045E340E v_mad_f32 v9, s14, v27, v24 ; D2820009 0462360E v_mad_f32 v23, s14, v28, v25 ; D2820017 0466380E image_sample v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[32:39], s[28:31] ; F0800700 00E8180A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v24, v1 ; 10020318 v_mul_f32_e32 v15, v25, v15 ; 101E1F19 v_mul_f32_e32 v16, v26, v16 ; 1020211A v_mad_f32 v17, 2.0, v17, -1.0 ; D2820011 03CE22F4 v_mad_f32 v18, 2.0, v18, -1.0 ; D2820012 03CE24F4 v_mad_f32 v19, 2.0, v19, -1.0 ; D2820013 03CE26F4 v_mul_f32_e32 v20, s82, v20 ; 10282852 v_mul_f32_e32 v21, s83, v21 ; 102A2A53 v_mul_f32_e32 v22, s84, v22 ; 102C2C54 v_mul_f32_e32 v24, s15, v29 ; 10303A0F v_mul_f32_e32 v25, s80, v30 ; 10323C50 v_mul_f32_e32 v26, s81, v31 ; 10343E51 v_mul_f32_e32 v27, v17, v17 ; 10362311 v_mad_f32 v27, v18, v18, v27 ; D282001B 046E2512 v_mad_f32 v27, v19, v19, v27 ; D282001B 046E2713 v_rsq_clamp_f32_e32 v27, v27 ; 7E36591B v_mul_f32_e32 v28, v13, v13 ; 10381B0D v_mad_f32 v28, v14, v14, v28 ; D282001C 04721D0E v_mad_f32 v28, v0, v0, v28 ; D282001C 04720100 v_rsq_clamp_f32_e32 v28, v28 ; 7E38591C v_min_f32_e32 v27, 0x7f7fffff, v27 ; 1E3636FF 7F7FFFFF v_mul_f32_e32 v17, v27, v17 ; 1022231B v_mul_f32_e32 v18, v27, v18 ; 1024251B v_mul_f32_e32 v19, v27, v19 ; 1026271B v_min_f32_e32 v27, 0x7f7fffff, v28 ; 1E3638FF 7F7FFFFF v_mul_f32_e32 v28, v27, v13 ; 10381B1B v_mul_f32_e32 v28, v28, v17 ; 1038231C v_mul_f32_e32 v29, v27, v14 ; 103A1D1B v_mad_f32 v28, v18, v29, v28 ; D282001C 04723B12 v_mul_f32_e32 v29, v27, v0 ; 103A011B v_mad_f32 v28, v19, v29, v28 ; D282001C 04723B13 v_mul_f32_e32 v29, v17, v28 ; 103A3911 v_mad_f32 v29, v28, v17, v29 ; D282001D 0476231C v_mad_f32 v13, -v13, v27, v29 ; D282000D 2476370D v_mul_f32_e32 v29, v18, v28 ; 103A3912 v_mad_f32 v29, v28, v18, v29 ; D282001D 0476251C v_mad_f32 v14, -v14, v27, v29 ; D282000E 2476370E v_mul_f32_e32 v29, v19, v28 ; 103A3913 v_mad_f32 v28, v28, v19, v29 ; D282001C 0476271C v_mad_f32 v0, -v0, v27, v28 ; D2820000 24723700 v_mad_f32 v27, v0, v2, 0 ; D282001B 02020500 v_mad_f32 v27, v14, v3, v27 ; D282001B 046E070E v_mul_f32_e32 v28, v4, v13 ; 10381B04 v_mad_f32 v28, v14, v5, v28 ; D282001C 04720B0E v_mul_f32_e32 v14, v5, v14 ; 101C1D05 v_mad_f32 v28, v0, v2, v28 ; D282001C 04720500 v_mad_f32 v0, v0, v2, v14 ; D2820000 043A0500 v_mad_f32 v14, v19, v2, 0 ; D282000E 02020513 v_mad_f32 v3, v18, v3, v14 ; D2820003 043A0712 v_mul_f32_e32 v4, v4, v17 ; 10082304 v_mad_f32 v4, v18, v5, v4 ; D2820004 04120B12 v_mul_f32_e32 v5, v5, v18 ; 100A2505 v_mad_f32 v4, v19, v2, v4 ; D2820004 04120513 v_mad_f32 v2, v19, v2, v5 ; D2820002 04160513 s_buffer_load_dword s6, s[8:11], 0x13 ; C2030913 s_buffer_load_dword s7, s[8:11], 0x12 ; C2038912 v_mad_f32 v0, v13, v6, v0 ; D2820000 04020D0D v_mad_f32 v2, v17, v6, v2 ; D2820002 040A0D11 image_sample v5, 1, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[20:27], s[16:19] ; F0800100 0085050A s_buffer_load_dword s14, s[8:11], 0x23 ; C2070923 s_buffer_load_dword s15, s[8:11], 0x25 ; C2078925 s_buffer_load_dword s16, s[8:11], 0x28 ; C2080928 s_buffer_load_dword s17, s[8:11], 0x29 ; C2088929 s_buffer_load_dword s8, s[8:11], 0x2a ; C204092A v_mov_b32_e32 v6, s13 ; 7E0C020D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s6, v1, v6 ; D2820001 041A0206 v_mov_b32_e32 v6, s12 ; 7E0C020C v_mad_f32 v6, s6, v15, v6 ; D2820006 041A1E06 v_mov_b32_e32 v10, s7 ; 7E140207 v_add_f32_e64 v11, 0, v27 clamp ; D206080B 00023680 v_add_f32_e64 v13, 0, v28 clamp ; D206080D 00023880 v_max_f32_e32 v11, 0x358637bd, v11 ; 201616FF 358637BD v_max_f32_e32 v13, 0x358637bd, v13 ; 201A1AFF 358637BD v_log_f32_e64 v11, |v11| ; D34E010B 0000010B v_log_f32_e64 v13, |v13| ; D34E010D 0000010D v_mad_f32 v10, s6, v16, v10 ; D282000A 042A2006 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e64 v14, 1.0, s15 ; D206000E 00001EF2 v_max_f32_e32 v11, v7, v11 ; 20161707 v_max_f32_e32 v13, v7, v13 ; 201A1B07 v_mul_f32_e32 v11, v14, v11 ; 1016170E v_mul_f32_e32 v13, v14, v13 ; 101A1B0E v_exp_f32_e32 v11, v11 ; 7E164B0B v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_mul_f32_e32 v11, v11, v20 ; 1016290B v_mad_f32 v11, v21, v13, v11 ; D282000B 042E1B15 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_max_f32_e32 v3, 0x358637bd, v3 ; 200606FF 358637BD v_mul_f32_e32 v3, v3, v20 ; 10062903 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_max_f32_e32 v0, 0x358637bd, v0 ; 200000FF 358637BD v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_max_f32_e32 v4, 0x358637bd, v4 ; 200808FF 358637BD v_mad_f32 v3, v21, v4, v3 ; D2820003 040E0915 v_max_f32_e32 v0, v7, v0 ; 20000107 v_mul_f32_e32 v0, v14, v0 ; 1000010E v_exp_f32_e32 v0, v0 ; 7E004B00 v_mad_f32 v0, v22, v0, v11 ; D2820000 042E0116 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_max_f32_e32 v2, 0x358637bd, v2 ; 200404FF 358637BD v_mad_f32 v2, v22, v2, v3 ; D2820002 040E0516 v_mov_b32_e32 v3, s14 ; 7E06020E v_mul_f32_e32 v3, s5, v3 ; 10060605 v_mov_b32_e32 v4, s14 ; 7E08020E v_mul_f32_e32 v4, s4, v4 ; 10080804 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mul_f32_e32 v7, s3, v7 ; 100E0E03 v_mad_f32 v3, v5, v3, s0 ; D2820003 00020705 v_mad_f32 v4, v5, v4, s1 ; D2820004 00060905 v_mad_f32 v5, v5, v7, s2 ; D2820005 000A0F05 v_mul_f32_e32 v7, v24, v8 ; 100E1118 v_mad_f32 v3, v0, v7, v3 ; D2820003 040E0F00 v_mul_f32_e32 v7, v25, v9 ; 100E1319 v_mul_f32_e32 v8, v26, v23 ; 10102F1A v_mul_f32_e32 v9, v1, v24 ; 10123101 v_mul_f32_e32 v11, v6, v25 ; 10163306 v_mul_f32_e32 v13, v10, v26 ; 101A350A v_mad_f32 v3, v9, v2, v3 ; D2820003 040E0509 v_mad_f32 v4, v0, v7, v4 ; D2820004 04120F00 v_mad_f32 v4, v11, v2, v4 ; D2820004 0412050B v_mad_f32 v0, v0, v8, v5 ; D2820000 04161100 v_mad_f32 v0, v13, v2, v0 ; D2820000 0402050D v_mad_f32 v1, v1, s16, v3 ; D2820001 040C2101 v_mad_f32 v2, v6, s17, v4 ; D2820002 04102306 v_mad_f32 v0, v10, s8, v0 ; D2820000 0400110A v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v0, v0, v12 ; 5E001900 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 88 VGPRS: 32 Code Size: 1176 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..7] DCL TEMP[0..1] DCL TEMP[2], LOCAL IMM[0] FLT32 { 0.2209, 0.1138, 0.0102, 0.0000} IMM[1] FLT32 { 2.0000, 127.0000, 0.0000, -340282346638528859811704183484516925440.0000} IMM[2] FLT32 { 0.3390, 0.6780, 0.1130, 255.0000} IMM[3] FLT32 { 0.4184, 0.7319, 0.2969, 0.0039} 0: MUL TEMP[0].xyz, CONST[6], CONST[6].wwww 1: TEX TEMP[1], IN[0], SAMP[0], 2D 2: MUL TEMP[0].xyz, TEMP[0], TEMP[1].xxxx 3: MOV TEMP[1].x, CONST[7].xxxx 4: MAD TEMP[0].xyz, TEMP[1].xxxx, TEMP[0], CONST[0] 5: DP3 TEMP[1].x, TEMP[0], IMM[0] 6: DP3 TEMP[1].y, TEMP[0], IMM[2] 7: DP3 TEMP[1].z, TEMP[0], IMM[3] 8: MAX TEMP[0].xyz, TEMP[1], IMM[0].wwww 9: RCP TEMP[0].z, TEMP[0].zzzz 10: MUL OUT[0].xy, TEMP[0].zzzz, TEMP[0] 11: LG2 TEMP[2].x, |TEMP[0].yyyy| 12: MAX TEMP[0].x, IMM[1].wwww, TEMP[2].xxxx 13: MAD TEMP[0].y, TEMP[0].xxxx, IMM[1].xxxx, IMM[1].yyyy 14: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].xxxx 15: FRC TEMP[0].x, TEMP[0].xxxx 16: MUL TEMP[0].z, TEMP[0].xxxx, IMM[2].wwww 17: MOV OUT[0].w, TEMP[0].xxxx 18: FRC TEMP[0].x, TEMP[0].zzzz 19: ADD TEMP[0].x, -TEMP[0].xxxx, TEMP[0].zzzz 20: MAD TEMP[0].x, TEMP[0].xxxx, -IMM[3].wwww, TEMP[0].yyyy 21: MUL OUT[0].z, TEMP[0].xxxx, IMM[3].wwww 22: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %32 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %33 = load <32 x i8>, <32 x i8> addrspace(2)* %32, align 32, !tbaa !0 %34 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 %36 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %38 = fmul float %27, %30 %39 = fmul float %28, %30 %40 = fmul float %29, %30 %41 = bitcast float %36 to i32 %42 = bitcast float %37 to i32 %43 = insertelement <2 x i32> undef, i32 %41, i32 0 %44 = insertelement <2 x i32> %43, i32 %42, i32 1 %45 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %44, <32 x i8> %33, <16 x i8> %35, i32 2) %46 = extractelement <4 x float> %45, i32 0 %47 = fmul float %38, %46 %48 = fmul float %39, %46 %49 = fmul float %40, %46 %50 = fmul float %31, %47 %51 = fadd float %50, %24 %52 = fmul float %31, %48 %53 = fadd float %52, %25 %54 = fmul float %31, %49 %55 = fadd float %54, %26 %56 = fmul float %51, 0x3FCC467380000000 %57 = fmul float %53, 0x3FBD21FF20000000 %58 = fadd float %57, %56 %59 = fmul float %55, 0x3F84E3BCE0000000 %60 = fadd float %58, %59 %61 = fmul float %51, 0x3FD5B22D00000000 %62 = fmul float %53, 0x3FE5B22D00000000 %63 = fadd float %62, %61 %64 = fmul float %55, 0x3FBCED9160000000 %65 = fadd float %63, %64 %66 = fmul float %51, 0x3FDAC710C0000000 %67 = fmul float %53, 0x3FE76BB980000000 %68 = fadd float %67, %66 %69 = fmul float %55, 0x3FD30068E0000000 %70 = fadd float %68, %69 %71 = call float @llvm.maxnum.f32(float %60, float 0x3EB0C6F7A0000000) %72 = call float @llvm.maxnum.f32(float %65, float 0x3EB0C6F7A0000000) %73 = call float @llvm.maxnum.f32(float %70, float 0x3EB0C6F7A0000000) %74 = fdiv float 1.000000e+00, %73 %75 = fmul float %74, %71 %76 = fmul float %74, %72 %77 = call float @fabs(float %72) %78 = call float @llvm.log2.f32(float %77) %79 = call float @llvm.maxnum.f32(float %78, float 0xC7EFFFFFE0000000) %80 = fmul float %79, 2.000000e+00 %81 = fadd float %80, 1.270000e+02 %82 = fadd float %79, %79 %83 = call float @llvm.AMDIL.fraction.(float %82) %84 = fmul float %83, 2.550000e+02 %85 = call float @llvm.AMDIL.fraction.(float %84) %86 = fsub float %84, %85 %87 = fmul float %86, 0xBF70101020000000 %88 = fadd float %87, %81 %89 = fmul float %88, 0x3F70101020000000 %90 = call i32 @llvm.SI.packf16(float %75, float %76) %91 = bitcast i32 %90 to float %92 = call i32 @llvm.SI.packf16(float %89, float %83) %93 = bitcast i32 %92 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %91, float %93, float %91, float %93) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v2, 0xff7fffff ; 7E0402FF FF7FFFFF v_mov_b32_e32 v3, 0x437f0000 ; 7E0602FF 437F0000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_buffer_load_dword s5, s[0:3], 0x1b ; C202811B s_buffer_load_dword s12, s[0:3], 0x18 ; C2060118 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_buffer_load_dword s6, s[0:3], 0x19 ; C2030119 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 s_buffer_load_dword s14, s[0:3], 0x2 ; C2070102 s_buffer_load_dword s0, s[0:3], 0x1c ; C200011C v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[16:23], s[8:11] ; F0800100 00440004 v_mov_b32_e32 v1, s5 ; 7E020205 v_mul_f32_e32 v1, s12, v1 ; 1002020C v_mov_b32_e32 v4, s5 ; 7E080205 v_mov_b32_e32 v5, s5 ; 7E0A0205 v_mul_f32_e32 v4, s6, v4 ; 10080806 v_mul_f32_e32 v5, s4, v5 ; 100A0A04 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v0, v1 ; 10020300 v_mul_f32_e32 v4, v0, v4 ; 10080900 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mov_b32_e32 v5, s7 ; 7E0A0207 v_mad_f32 v1, v1, s0, v5 ; D2820001 04140101 v_mov_b32_e32 v5, s13 ; 7E0A020D v_mad_f32 v4, v4, s0, v5 ; D2820004 04140104 v_mov_b32_e32 v5, s14 ; 7E0A020E v_mad_f32 v0, v0, s0, v5 ; D2820000 04140100 v_mul_f32_e32 v5, 0x3e62339c, v1 ; 100A02FF 3E62339C v_madmk_f32_e32 v5, v4, v5, 0x3de90ff9 ; 400A0B04 3DE90FF9 v_mul_f32_e32 v6, 0x3ead9168, v1 ; 100C02FF 3EAD9168 v_madmk_f32_e32 v6, v4, v6, 0x3f2d9168 ; 400C0D04 3F2D9168 v_mul_f32_e32 v1, 0x3ed63886, v1 ; 100202FF 3ED63886 v_madmk_f32_e32 v1, v4, v1, 0x3f3b5dcc ; 40020304 3F3B5DCC v_madmk_f32_e32 v4, v0, v5, 0x3c271de7 ; 40080B00 3C271DE7 v_madmk_f32_e32 v5, v0, v6, 0x3de76c8b ; 400A0D00 3DE76C8B v_madmk_f32_e32 v0, v0, v1, 0x3e980347 ; 40000300 3E980347 v_max_f32_e32 v0, 0x358637bd, v0 ; 200000FF 358637BD v_rcp_f32_e32 v0, v0 ; 7E005500 v_max_f32_e32 v1, 0x358637bd, v5 ; 20020AFF 358637BD v_log_f32_e64 v5, |v1| ; D34E0105 00000101 v_max_f32_e32 v4, 0x358637bd, v4 ; 200808FF 358637BD v_mul_f32_e32 v4, v4, v0 ; 10080104 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_max_f32_e32 v1, v5, v2 ; 20020505 v_madak_f32_e32 v2, 2.0, v1, 0x42fe0000 ; 420402F4 42FE0000 v_add_f32_e32 v5, v1, v1 ; 060A0301 v_floor_f32_e32 v5, v5 ; 7E0A4905 v_mad_f32 v1, 2.0, v1, -v5 ; D2820001 841602F4 v_mul_f32_e32 v5, 0x437f0000, v1 ; 100A02FF 437F0000 v_floor_f32_e32 v5, v5 ; 7E0A4905 v_mad_f32 v5, v1, v3, -v5 ; D2820005 84160701 v_mad_f32 v3, v1, v3, -v5 ; D2820003 84160701 v_madmk_f32_e32 v2, v3, v2, 0xbb808081 ; 40040503 BB808081 v_mul_f32_e32 v2, 0x3b808081, v2 ; 100404FF 3B808081 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 384 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..6] DCL TEMP[0] 0: MOV TEMP[0].xyz, CONST[6] 1: MAD OUT[0].xyz, IN[1], TEMP[0], CONST[0] 2: TEX TEMP[0], IN[0], SAMP[0], 2D 3: MUL TEMP[0].x, TEMP[0].xxxx, CONST[6].wwww 4: MUL OUT[0].w, TEMP[0].xxxx, IN[1].wwww 5: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %31 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0 %33 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %41 = fmul float %37, %27 %42 = fadd float %41, %24 %43 = fmul float %38, %28 %44 = fadd float %43, %25 %45 = fmul float %39, %29 %46 = fadd float %45, %26 %47 = bitcast float %35 to i32 %48 = bitcast float %36 to i32 %49 = insertelement <2 x i32> undef, i32 %47, i32 0 %50 = insertelement <2 x i32> %49, i32 %48, i32 1 %51 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %50, <32 x i8> %32, <16 x i8> %34, i32 2) %52 = extractelement <4 x float> %51, i32 0 %53 = fmul float %52, %30 %54 = fmul float %53, %40 %55 = call i32 @llvm.SI.packf16(float %42, float %44) %56 = bitcast i32 %55 to float %57 = call i32 @llvm.SI.packf16(float %46, float %54) %58 = bitcast i32 %57 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %56, float %58, float %56, float %58) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_mov_b32 m0, s9 ; BEFC0309 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s12, s[0:3], 0x19 ; C2060119 s_buffer_load_dword s13, s[0:3], 0x1a ; C206811A s_buffer_load_dword s0, s[0:3], 0x1b ; C200011B v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[16:19] ; F0800100 00850102 v_mov_b32_e32 v2, s8 ; 7E040208 v_mad_f32 v2, s11, v4, v2 ; D2820002 040A080B v_mov_b32_e32 v3, s9 ; 7E060209 v_mad_f32 v3, s12, v5, v3 ; D2820003 040E0A0C v_mov_b32_e32 v4, s10 ; 7E08020A v_mad_f32 v4, s13, v6, v4 ; D2820004 04120C0D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 180 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], COLOR, PERSPECTIVE DCL IN[1], TEXCOORD[0], PERSPECTIVE DCL IN[2], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..9] DCL TEMP[0] DCL TEMP[1], LOCAL DCL TEMP[2..3] IMM[0] FLT32 { -0.0000, 0.0000, 340282346638528859811704183484516925440.0000, 0.0000} 0: ADD TEMP[0].x, CONST[8].zzzz, IN[1].xxxx 1: ADD TEMP[0].y, CONST[9].xxxx, IN[1].yyyy 2: TEX TEMP[0], TEMP[0], SAMP[0], 2D 3: MUL TEMP[0].x, TEMP[0].xxxx, CONST[6].wwww 4: MUL TEMP[0].x, TEMP[0].xxxx, CONST[7].wwww 5: MUL TEMP[0].x, TEMP[0].xxxx, IN[0].xxxx 6: DP3 TEMP[0].y, IN[2], IN[2] 7: RSQ TEMP[1], |TEMP[0].yyyy| 8: MIN TEMP[0].y, IMM[0].zzzz, TEMP[1] 9: MUL_SAT TEMP[0].y, TEMP[0].yyyy, IN[2].zzzz 10: POW TEMP[2].x, |TEMP[0].yyyy|, CONST[9].yyyy 11: ADD TEMP[0].y, TEMP[0].yyyy, IMM[0].xxxx 12: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx 13: MOV TEMP[2].xyz, CONST[6] 14: MOV TEMP[3].xyz, CONST[7] 15: MAD TEMP[2].xyz, TEMP[2], TEMP[3], CONST[0] 16: MUL TEMP[0].xzw, TEMP[0].xxxx, TEMP[2].xyyz 17: CMP OUT[0].xyz, TEMP[0].yyyy, IMM[0].yyyy, TEMP[0].xzww 18: MOV OUT[0].w, IMM[0].yyyy 19: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %38 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %39 = load <32 x i8>, <32 x i8> addrspace(2)* %38, align 32, !tbaa !0 %40 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %48 = fadd float %35, %43 %49 = fadd float %36, %44 %50 = bitcast float %48 to i32 %51 = bitcast float %49 to i32 %52 = insertelement <2 x i32> undef, i32 %50, i32 0 %53 = insertelement <2 x i32> %52, i32 %51, i32 1 %54 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %53, <32 x i8> %39, <16 x i8> %41, i32 2) %55 = extractelement <4 x float> %54, i32 0 %56 = fmul float %55, %30 %57 = fmul float %56, %34 %58 = fmul float %57, %42 %59 = fmul float %45, %45 %60 = fmul float %46, %46 %61 = fadd float %60, %59 %62 = fmul float %47, %47 %63 = fadd float %61, %62 %64 = call float @fabs(float %63) %65 = call float @llvm.AMDGPU.rsq.clamped.f32(float %64) %66 = call float @llvm.minnum.f32(float %65, float 0x47EFFFFFE0000000) %67 = fmul float %66, %47 %68 = call float @llvm.AMDIL.clamp.(float %67, float 0.000000e+00, float 1.000000e+00) %69 = call float @fabs(float %68) %70 = call float @llvm.pow.f32(float %69, float %37) %71 = fadd float %68, 0xBEB0C6F7A0000000 %72 = fmul float %58, %70 %73 = fmul float %27, %31 %74 = fadd float %73, %24 %75 = fmul float %28, %32 %76 = fadd float %75, %25 %77 = fmul float %29, %33 %78 = fadd float %77, %26 %79 = fmul float %72, %74 %80 = fmul float %72, %76 %81 = fmul float %72, %78 %82 = call float @llvm.AMDGPU.cndlt(float %71, float 0.000000e+00, float %79) %83 = call float @llvm.AMDGPU.cndlt(float %71, float 0.000000e+00, float %80) %84 = call float @llvm.AMDGPU.cndlt(float %71, float 0.000000e+00, float %81) %85 = call i32 @llvm.SI.packf16(float %82, float %83) %86 = bitcast i32 %85 to float %87 = call i32 @llvm.SI.packf16(float %84, float 0.000000e+00) %88 = bitcast i32 %87 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %86, float %88, float %86, float %88) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 s_buffer_load_dword s5, s[0:3], 0x24 ; C2028124 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v0, v0, 2, 2, [m0] ; C8000A00 v_interp_p2_f32 v0, [v0], v1, 2, 2, [m0] ; C8010A01 s_buffer_load_dword s6, s[0:3], 0x25 ; C2030125 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v7, s4, v3 ; 060E0604 v_add_f32_e32 v8, s5, v4 ; 06100805 image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[12:19], s[8:11] ; F0800100 00430107 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s8, s[0:3], 0x18 ; C2040118 s_buffer_load_dword s9, s[0:3], 0x1c ; C204811C s_buffer_load_dword s10, s[0:3], 0x1a ; C205011A s_buffer_load_dword s11, s[0:3], 0x1b ; C205811B s_buffer_load_dword s12, s[0:3], 0x1f ; C206011F v_mul_f32_e32 v3, v5, v5 ; 10060B05 v_mad_f32 v3, v6, v6, v3 ; D2820003 040E0D06 s_buffer_load_dword s13, s[0:3], 0x19 ; C2068119 v_mad_f32 v3, v0, v0, v3 ; D2820003 040E0100 v_rsq_clamp_f32_e64 v3, |v3| ; D3580103 00000103 s_buffer_load_dword s14, s[0:3], 0x1d ; C207011D s_buffer_load_dword s0, s[0:3], 0x1e ; C200011E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v4, s9 ; 7E080209 v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v4, s8, v4, v5 ; D2820004 04160808 v_min_f32_e32 v3, 0x7f7fffff, v3 ; 1E0606FF 7F7FFFFF v_mul_f32_e32 v0, v0, v3 ; 10000700 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_and_b32_e32 v3, 0x7fffffff, v0 ; 360600FF 7FFFFFFF v_log_f32_e32 v3, v3 ; 7E064F03 v_mul_f32_e32 v1, s11, v1 ; 1002020B v_mul_f32_e32 v1, s12, v1 ; 1002020C v_mul_f32_e32 v1, v2, v1 ; 10020302 v_mul_legacy_f32_e32 v2, s6, v3 ; 0E040606 v_mov_b32_e32 v3, 0xb58637bd ; 7E0602FF B58637BD v_add_f32_e32 v0, v0, v3 ; 06000700 v_exp_f32_e32 v2, v2 ; 7E044B02 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_mov_b32_e32 v2, s14 ; 7E04020E v_mov_b32_e32 v3, s5 ; 7E060205 v_mad_f32 v2, s13, v2, v3 ; D2820002 040E040D v_mov_b32_e32 v3, s0 ; 7E060200 v_mov_b32_e32 v5, s7 ; 7E0A0207 v_mad_f32 v3, s10, v3, v5 ; D2820003 0416060A v_mul_f32_e32 v4, v4, v1 ; 10080304 v_mul_f32_e32 v2, v2, v1 ; 10040302 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v4, 0, vcc ; D2000000 01A90104 v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 360 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[4], PERSPECTIVE DCL IN[2], TEXCOORD[5], PERSPECTIVE DCL IN[3], TEXCOORD[6], PERSPECTIVE DCL IN[4], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..10] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, -0.0000} IMM[1] FLT32 { 0.3000, 0.5900, 0.1100, 0.0000} IMM[2] FLT32 { 15.0000, 0.9151, 0.0000, 340282346638528859811704183484516925440.0000} 0: DP3 TEMP[0].x, IN[3], IN[3] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[2].wwww, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[3], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[1], IN[1] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[2].wwww, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[1], TEMP[0].xxxx 8: TEX TEMP[3], IN[0], SAMP[1], 2D 9: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[0].x, TEMP[3], TEMP[3] 11: RSQ TEMP[0].x, TEMP[0].xxxx 12: MIN TEMP[0].x, IMM[2].wwww, TEMP[0].xxxx 13: MUL TEMP[4].xyz, TEMP[3], TEMP[0].xxxx 14: DP3 TEMP[1].w, TEMP[4], TEMP[1] 15: MUL TEMP[5].xyz, TEMP[1].wwww, TEMP[4] 16: MAD TEMP[5].xyz, TEMP[5], IMM[0].xxxx, -TEMP[1] 17: DP3 TEMP[1].w, IN[2], IN[2] 18: RSQ TEMP[0], |TEMP[1].wwww| 19: MIN TEMP[2].w, IMM[2].wwww, TEMP[0] 20: MUL TEMP[6].xyz, TEMP[2].wwww, IN[2] 21: DP3 TEMP[2].w, TEMP[6], -CONST[10] 22: ADD TEMP[2].w, TEMP[2].wwww, -CONST[11].xxxx 23: MUL_SAT TEMP[2].w, TEMP[2].wwww, CONST[11].yyyy 24: MUL TEMP[2].w, TEMP[2].wwww, TEMP[2].wwww 25: ADD TEMP[1].w, -TEMP[1].wwww, IMM[0].zzzz 26: MUL TEMP[6], TEMP[2].wwww, TEMP[1].wwww 27: MOV TEMP[7], TEMP[6] 28: KILL_IF TEMP[7] 29: UIF CONST[240].xxxx :0 30: RCP TEMP[3].w, IN[4].wwww 31: MUL TEMP[6].xy, TEMP[3].wwww, IN[4] 32: MAD TEMP[6].xy, TEMP[6], CONST[1], CONST[1].wzzw 33: TEX TEMP[7], TEMP[6], SAMP[0], 2D 34: ELSE :36 35: MOV TEMP[7].xyz, IMM[0].zzzz 36: ENDIF 37: MOV TEMP[6].z, IMM[0].zzzz 38: ADD TEMP[6].xyz, TEMP[6].zzzz, -CONST[0] 39: TEX TEMP[8], IN[0], SAMP[2], 2D 40: DP3 TEMP[3].w, TEMP[8], IMM[1] 41: LRP TEMP[9].xyz, CONST[7].xxxx, TEMP[3].wwww, TEMP[8] 42: DP3_SAT TEMP[1].x, TEMP[3], TEMP[1] 43: ADD TEMP[1].y, TEMP[1].xxxx, IMM[0].wwww 44: POW TEMP[3].x, |TEMP[1].xxxx|, CONST[7].yyyy 45: CMP TEMP[1].x, TEMP[1].yyyy, IMM[1].wwww, TEMP[3].xxxx 46: MUL TEMP[3].xyz, TEMP[1].xxxx, CONST[6] 47: TEX TEMP[10], IN[0], SAMP[3], 2D 48: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[10].yyyy 49: MAD TEMP[3].xyz, TEMP[9], TEMP[3], -TEMP[8] 50: MAD TEMP[1].xyz, TEMP[1].xxxx, TEMP[3], TEMP[8] 51: DP3 TEMP[3].x, TEMP[1], IMM[1] 52: LRP TEMP[8].xyz, CONST[7].zzzz, TEMP[3].xxxx, TEMP[1] 53: MUL TEMP[1].xyz, TEMP[8], CONST[7].wwww 54: MUL TEMP[1].xyz, TEMP[6], TEMP[1] 55: MAD TEMP[1].xyz, TEMP[1], CONST[4].wwww, CONST[4] 56: MAX TEMP[3].x, TEMP[1].wwww, IMM[1].wwww 57: ADD TEMP[1].w, TEMP[3].xxxx, IMM[0].wwww 58: POW TEMP[4].w, |TEMP[3].xxxx|, CONST[9].wwww 59: CMP TEMP[1].w, TEMP[1].wwww, IMM[1].wwww, TEMP[4].wwww 60: TEX TEMP[3], IN[0], SAMP[4], 2D 61: MUL TEMP[3].xyz, TEMP[3], CONST[8].xxxx 62: MUL TEMP[3].xyz, TEMP[3], CONST[7].wwww 63: MAD TEMP[3].xyz, TEMP[3], CONST[5].wwww, CONST[5] 64: DP3_SAT TEMP[3].w, TEMP[4], TEMP[2] 65: ADD TEMP[4].x, TEMP[3].wwww, IMM[0].wwww 66: CMP TEMP[3].w, TEMP[4].xxxx, IMM[1].wwww, TEMP[3].wwww 67: DP3_SAT TEMP[2].x, TEMP[5], TEMP[2] 68: ADD TEMP[2].y, TEMP[2].xxxx, IMM[0].wwww 69: POW TEMP[4].x, |TEMP[2].xxxx|, IMM[2].xxxx 70: MUL TEMP[2].x, TEMP[4].xxxx, IMM[2].yyyy 71: MUL TEMP[3].xyz, TEMP[3], TEMP[2].xxxx 72: CMP TEMP[2].xyz, TEMP[2].yyyy, IMM[1].wwww, TEMP[3] 73: MAD TEMP[1].xyz, TEMP[1], TEMP[3].wwww, TEMP[2] 74: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1] 75: MUL TEMP[1].xyz, TEMP[7], TEMP[1] 76: MUL TEMP[1].xyz, TEMP[1], CONST[9] 77: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[1] 78: CMP OUT[0].xyz, -TEMP[6].wwww, TEMP[1], IMM[1].wwww 79: MOV OUT[0].w, IMM[1].wwww 80: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %53 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %54 = load <32 x i8>, <32 x i8> addrspace(2)* %53, align 32, !tbaa !0 %55 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 %57 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %58 = bitcast <8 x i32> addrspace(2)* %57 to <32 x i8> addrspace(2)* %59 = load <32 x i8>, <32 x i8> addrspace(2)* %58, align 32, !tbaa !0 %60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %61 = bitcast <4 x i32> addrspace(2)* %60 to <16 x i8> addrspace(2)* %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %64 = bitcast <8 x i32> addrspace(2)* %63 to <32 x i8> addrspace(2)* %65 = load <32 x i8>, <32 x i8> addrspace(2)* %64, align 32, !tbaa !0 %66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %67 = bitcast <4 x i32> addrspace(2)* %66 to <16 x i8> addrspace(2)* %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %70 = bitcast <8 x i32> addrspace(2)* %69 to <32 x i8> addrspace(2)* %71 = load <32 x i8>, <32 x i8> addrspace(2)* %70, align 32, !tbaa !0 %72 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %73 = bitcast <4 x i32> addrspace(2)* %72 to <16 x i8> addrspace(2)* %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %76 = bitcast <8 x i32> addrspace(2)* %75 to <32 x i8> addrspace(2)* %77 = load <32 x i8>, <32 x i8> addrspace(2)* %76, align 32, !tbaa !0 %78 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %79 = bitcast <4 x i32> addrspace(2)* %78 to <16 x i8> addrspace(2)* %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %91 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %92 = fmul float %89, %89 %93 = fmul float %90, %90 %94 = fadd float %93, %92 %95 = fmul float %91, %91 %96 = fadd float %94, %95 %97 = call float @llvm.AMDGPU.rsq.clamped.f32(float %96) %98 = call float @llvm.minnum.f32(float %97, float 0x47EFFFFFE0000000) %99 = fmul float %89, %98 %100 = fmul float %90, %98 %101 = fmul float %91, %98 %102 = fmul float %83, %83 %103 = fmul float %84, %84 %104 = fadd float %103, %102 %105 = fmul float %85, %85 %106 = fadd float %104, %105 %107 = call float @llvm.AMDGPU.rsq.clamped.f32(float %106) %108 = call float @llvm.minnum.f32(float %107, float 0x47EFFFFFE0000000) %109 = fmul float %83, %108 %110 = fmul float %84, %108 %111 = fmul float %85, %108 %112 = bitcast float %81 to i32 %113 = bitcast float %82 to i32 %114 = insertelement <2 x i32> undef, i32 %112, i32 0 %115 = insertelement <2 x i32> %114, i32 %113, i32 1 %116 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %115, <32 x i8> %59, <16 x i8> %62, i32 2) %117 = extractelement <4 x float> %116, i32 0 %118 = extractelement <4 x float> %116, i32 1 %119 = extractelement <4 x float> %116, i32 2 %120 = fmul float %117, 2.000000e+00 %121 = fadd float %120, -1.000000e+00 %122 = fmul float %118, 2.000000e+00 %123 = fadd float %122, -1.000000e+00 %124 = fmul float %119, 2.000000e+00 %125 = fadd float %124, -1.000000e+00 %126 = fmul float %121, %121 %127 = fmul float %123, %123 %128 = fadd float %127, %126 %129 = fmul float %125, %125 %130 = fadd float %128, %129 %131 = call float @llvm.AMDGPU.rsq.clamped.f32(float %130) %132 = call float @llvm.minnum.f32(float %131, float 0x47EFFFFFE0000000) %133 = fmul float %121, %132 %134 = fmul float %123, %132 %135 = fmul float %125, %132 %136 = fmul float %133, %99 %137 = fmul float %134, %100 %138 = fadd float %137, %136 %139 = fmul float %135, %101 %140 = fadd float %138, %139 %141 = fmul float %140, %133 %142 = fmul float %140, %134 %143 = fmul float %140, %135 %144 = fmul float %141, 2.000000e+00 %145 = fsub float %144, %99 %146 = fmul float %142, 2.000000e+00 %147 = fsub float %146, %100 %148 = fmul float %143, 2.000000e+00 %149 = fsub float %148, %101 %150 = fmul float %86, %86 %151 = fmul float %87, %87 %152 = fadd float %151, %150 %153 = fmul float %88, %88 %154 = fadd float %152, %153 %155 = call float @fabs(float %154) %156 = call float @llvm.AMDGPU.rsq.clamped.f32(float %155) %157 = call float @llvm.minnum.f32(float %156, float 0x47EFFFFFE0000000) %158 = fmul float %157, %86 %159 = fmul float %157, %87 %160 = fmul float %157, %88 %161 = fmul float %47, %158 %162 = fsub float -0.000000e+00, %161 %163 = fmul float %48, %159 %164 = fsub float %162, %163 %165 = fmul float %49, %160 %166 = fsub float %164, %165 %167 = fsub float %166, %50 %168 = fmul float %167, %51 %169 = call float @llvm.AMDIL.clamp.(float %168, float 0.000000e+00, float 1.000000e+00) %170 = fmul float %169, %169 %171 = fsub float 1.000000e+00, %154 %172 = fmul float %170, %171 %173 = fmul float %170, %171 %174 = fmul float %170, %171 %175 = fmul float %170, %171 %176 = fcmp olt float %172, 0.000000e+00 %177 = fcmp olt float %173, 0.000000e+00 %178 = fcmp olt float %174, 0.000000e+00 %179 = fcmp olt float %175, 0.000000e+00 %180 = or i1 %179, %178 %181 = or i1 %180, %177 %182 = or i1 %181, %176 %183 = select i1 %182, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %183) %184 = bitcast float %52 to i32 %185 = icmp eq i32 %184, 0 br i1 %185, label %ENDIF, label %IF IF: ; preds = %main_body %186 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %187 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %188 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %189 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %190 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %191 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %192 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %193 = fdiv float 1.000000e+00, %186 %194 = fmul float %193, %188 %195 = fmul float %193, %187 %196 = fmul float %194, %192 %197 = fadd float %196, %189 %198 = fmul float %195, %191 %199 = fadd float %198, %190 %200 = bitcast float %197 to i32 %201 = bitcast float %199 to i32 %202 = insertelement <2 x i32> undef, i32 %200, i32 0 %203 = insertelement <2 x i32> %202, i32 %201, i32 1 %204 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %203, <32 x i8> %54, <16 x i8> %56, i32 2) %205 = extractelement <4 x float> %204, i32 0 %206 = extractelement <4 x float> %204, i32 1 %207 = extractelement <4 x float> %204, i32 2 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp28.0 = phi float [ %205, %IF ], [ 1.000000e+00, %main_body ] %temp29.0 = phi float [ %206, %IF ], [ 1.000000e+00, %main_body ] %temp30.0 = phi float [ %207, %IF ], [ 1.000000e+00, %main_body ] %208 = fsub float 1.000000e+00, %24 %209 = fsub float 1.000000e+00, %25 %210 = fsub float 1.000000e+00, %26 %211 = bitcast float %81 to i32 %212 = bitcast float %82 to i32 %213 = insertelement <2 x i32> undef, i32 %211, i32 0 %214 = insertelement <2 x i32> %213, i32 %212, i32 1 %215 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %214, <32 x i8> %65, <16 x i8> %68, i32 2) %216 = extractelement <4 x float> %215, i32 0 %217 = extractelement <4 x float> %215, i32 1 %218 = extractelement <4 x float> %215, i32 2 %219 = fmul float %216, 0x3FD3333340000000 %220 = fmul float %217, 0x3FE2E147A0000000 %221 = fadd float %220, %219 %222 = fmul float %218, 0x3FBC28F5C0000000 %223 = fadd float %221, %222 %224 = call float @llvm.AMDGPU.lrp(float %38, float %223, float %216) %225 = call float @llvm.AMDGPU.lrp(float %38, float %223, float %217) %226 = call float @llvm.AMDGPU.lrp(float %38, float %223, float %218) %227 = fmul float %121, %99 %228 = fmul float %123, %100 %229 = fadd float %228, %227 %230 = fmul float %125, %101 %231 = fadd float %229, %230 %232 = call float @llvm.AMDIL.clamp.(float %231, float 0.000000e+00, float 1.000000e+00) %233 = fadd float %232, 0xBEB0C6F7A0000000 %234 = call float @fabs(float %232) %235 = call float @llvm.pow.f32(float %234, float %39) %236 = call float @llvm.AMDGPU.cndlt(float %233, float 0.000000e+00, float %235) %237 = fmul float %236, %35 %238 = fmul float %236, %36 %239 = fmul float %236, %37 %240 = bitcast float %81 to i32 %241 = bitcast float %82 to i32 %242 = insertelement <2 x i32> undef, i32 %240, i32 0 %243 = insertelement <2 x i32> %242, i32 %241, i32 1 %244 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %243, <32 x i8> %71, <16 x i8> %74, i32 2) %245 = extractelement <4 x float> %244, i32 1 %246 = fmul float %236, %245 %247 = fmul float %224, %237 %248 = fsub float %247, %216 %249 = fmul float %225, %238 %250 = fsub float %249, %217 %251 = fmul float %226, %239 %252 = fsub float %251, %218 %253 = fmul float %246, %248 %254 = fadd float %253, %216 %255 = fmul float %246, %250 %256 = fadd float %255, %217 %257 = fmul float %246, %252 %258 = fadd float %257, %218 %259 = fmul float %254, 0x3FD3333340000000 %260 = fmul float %256, 0x3FE2E147A0000000 %261 = fadd float %260, %259 %262 = fmul float %258, 0x3FBC28F5C0000000 %263 = fadd float %261, %262 %264 = call float @llvm.AMDGPU.lrp(float %40, float %263, float %254) %265 = call float @llvm.AMDGPU.lrp(float %40, float %263, float %256) %266 = call float @llvm.AMDGPU.lrp(float %40, float %263, float %258) %267 = fmul float %264, %41 %268 = fmul float %265, %41 %269 = fmul float %266, %41 %270 = fmul float %208, %267 %271 = fmul float %209, %268 %272 = fmul float %210, %269 %273 = fmul float %270, %30 %274 = fadd float %273, %27 %275 = fmul float %271, %30 %276 = fadd float %275, %28 %277 = fmul float %272, %30 %278 = fadd float %277, %29 %279 = call float @llvm.maxnum.f32(float %171, float 0.000000e+00) %280 = fadd float %279, 0xBEB0C6F7A0000000 %281 = call float @fabs(float %279) %282 = call float @llvm.pow.f32(float %281, float %46) %283 = call float @llvm.AMDGPU.cndlt(float %280, float 0.000000e+00, float %282) %284 = bitcast float %81 to i32 %285 = bitcast float %82 to i32 %286 = insertelement <2 x i32> undef, i32 %284, i32 0 %287 = insertelement <2 x i32> %286, i32 %285, i32 1 %288 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %287, <32 x i8> %77, <16 x i8> %80, i32 2) %289 = extractelement <4 x float> %288, i32 0 %290 = extractelement <4 x float> %288, i32 1 %291 = extractelement <4 x float> %288, i32 2 %292 = fmul float %289, %42 %293 = fmul float %290, %42 %294 = fmul float %291, %42 %295 = fmul float %292, %41 %296 = fmul float %293, %41 %297 = fmul float %294, %41 %298 = fmul float %295, %34 %299 = fadd float %298, %31 %300 = fmul float %296, %34 %301 = fadd float %300, %32 %302 = fmul float %297, %34 %303 = fadd float %302, %33 %304 = fmul float %133, %109 %305 = fmul float %134, %110 %306 = fadd float %305, %304 %307 = fmul float %135, %111 %308 = fadd float %306, %307 %309 = call float @llvm.AMDIL.clamp.(float %308, float 0.000000e+00, float 1.000000e+00) %310 = fadd float %309, 0xBEB0C6F7A0000000 %311 = call float @llvm.AMDGPU.cndlt(float %310, float 0.000000e+00, float %309) %312 = fmul float %145, %109 %313 = fmul float %147, %110 %314 = fadd float %313, %312 %315 = fmul float %149, %111 %316 = fadd float %314, %315 %317 = call float @llvm.AMDIL.clamp.(float %316, float 0.000000e+00, float 1.000000e+00) %318 = fadd float %317, 0xBEB0C6F7A0000000 %319 = call float @fabs(float %317) %320 = call float @llvm.pow.f32(float %319, float 1.500000e+01) %321 = fmul float %320, 0x3FED48D5A0000000 %322 = fmul float %299, %321 %323 = fmul float %301, %321 %324 = fmul float %303, %321 %325 = call float @llvm.AMDGPU.cndlt(float %318, float 0.000000e+00, float %322) %326 = call float @llvm.AMDGPU.cndlt(float %318, float 0.000000e+00, float %323) %327 = call float @llvm.AMDGPU.cndlt(float %318, float 0.000000e+00, float %324) %328 = fmul float %274, %311 %329 = fadd float %328, %325 %330 = fmul float %276, %311 %331 = fadd float %330, %326 %332 = fmul float %278, %311 %333 = fadd float %332, %327 %334 = fmul float %283, %329 %335 = fmul float %283, %331 %336 = fmul float %283, %333 %337 = fmul float %temp28.0, %334 %338 = fmul float %temp29.0, %335 %339 = fmul float %temp30.0, %336 %340 = fmul float %337, %43 %341 = fmul float %338, %44 %342 = fmul float %339, %45 %343 = fmul float %170, %340 %344 = fmul float %170, %341 %345 = fmul float %170, %342 %346 = fsub float -0.000000e+00, %175 %347 = call float @llvm.AMDGPU.cndlt(float %346, float %343, float 0.000000e+00) %348 = fsub float -0.000000e+00, %175 %349 = call float @llvm.AMDGPU.cndlt(float %348, float %344, float 0.000000e+00) %350 = fsub float -0.000000e+00, %175 %351 = call float @llvm.AMDGPU.cndlt(float %350, float %345, float 0.000000e+00) %352 = call i32 @llvm.SI.packf16(float %347, float %349) %353 = bitcast i32 %352 to float %354 = call i32 @llvm.SI.packf16(float %351, float 0.000000e+00) %355 = bitcast i32 %354 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %353, float %355, float %353, float %355) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 v_interp_p1_f32 v6, v0, 1, 0, [m0] ; C8180100 v_interp_p2_f32 v6, [v6], v1, 1, 0, [m0] ; C8190101 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v3, v0, 2, 1, [m0] ; C80C0600 v_interp_p2_f32 v3, [v3], v1, 2, 1, [m0] ; C80D0601 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[12:19], s[6:7], 0x8 ; C0C60708 v_interp_p1_f32 v12, v0, 2, 2, [m0] ; C8300A00 v_interp_p2_f32 v12, [v12], v1, 2, 2, [m0] ; C8310A01 v_interp_p1_f32 v9, v0, 0, 3, [m0] ; C8240C00 s_load_dwordx4 s[24:27], s[2:3], 0x0 ; C08C0300 v_interp_p2_f32 v9, [v9], v1, 0, 3, [m0] ; C8250C01 v_interp_p1_f32 v10, v0, 1, 3, [m0] ; C8280D00 v_interp_p2_f32 v10, [v10], v1, 1, 3, [m0] ; C8290D01 v_interp_p1_f32 v11, v0, 2, 3, [m0] ; C82C0E00 v_interp_p2_f32 v11, [v11], v1, 2, 3, [m0] ; C82D0E01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[13:15], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[12:19], s[8:11] ; F0800700 00430D05 v_mul_f32_e32 v16, v7, v7 ; 10200F07 v_mad_f32 v16, v8, v8, v16 ; D2820010 04421108 v_mad_f32 v16, v12, v12, v16 ; D2820010 0442190C s_buffer_load_dword s0, s[24:27], 0x28 ; C2001928 s_buffer_load_dword s1, s[24:27], 0x29 ; C2009929 s_buffer_load_dword s2, s[24:27], 0x2a ; C201192A s_buffer_load_dword s3, s[24:27], 0x2c ; C201992C s_buffer_load_dword s8, s[24:27], 0x2d ; C204192D v_rsq_clamp_f32_e64 v17, |v16| ; D3580111 00000110 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v18, 2.0, v13, -1.0 ; D2820012 03CE1AF4 v_mad_f32 v19, 2.0, v14, -1.0 ; D2820013 03CE1CF4 v_mad_f32 v20, 2.0, v15, -1.0 ; D2820014 03CE1EF4 v_min_f32_e32 v13, 0x7f7fffff, v17 ; 1E1A22FF 7F7FFFFF v_mul_f32_e32 v7, v7, v13 ; 100E1B07 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s0, v7 ; 100E0E00 v_mul_f32_e32 v8, v8, v13 ; 10101B08 v_mad_f32 v7, -s1, v8, -v7 ; D2820007 A41E1001 v_mul_f32_e32 v8, v12, v13 ; 10101B0C v_mad_f32 v7, -s2, v8, v7 ; D2820007 241E1002 v_subrev_f32_e32 v7, s3, v7 ; 0A0E0E03 v_mul_f32_e32 v7, s8, v7 ; 100E0E08 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 v_mul_f32_e32 v7, v7, v7 ; 100E0F07 v_sub_f32_e32 v12, 1.0, v16 ; 081820F2 v_mul_f32_e32 v8, v12, v7 ; 10100F0C v_cmp_gt_f32_e32 vcc, 0, v8 ; 7C081080 v_cndmask_b32_e64 v13, 0, -1.0, vcc ; D200000D 01A9E680 v_cndmask_b32_e64 v13, v13, -1.0, vcc ; D200000D 01A9E70D v_cndmask_b32_e64 v13, v13, -1.0, vcc ; D200000D 01A9E70D v_cndmask_b32_e64 v14, v13, -1.0, vcc ; D200000E 01A9E70D s_movk_i32 s0, 0xf00 ; B0000F00 s_buffer_load_dword s29, s[24:27], s0 ; C20E9800 v_mul_f32_e32 v13, v9, v9 ; 101A1309 v_mad_f32 v13, v10, v10, v13 ; D282000D 0436150A v_mad_f32 v13, v11, v11, v13 ; D282000D 0436170B v_rsq_clamp_f32_e32 v21, v13 ; 7E2A590D v_mul_f32_e32 v13, v2, v2 ; 101A0502 v_mad_f32 v13, v4, v4, v13 ; D282000D 04360904 v_mad_f32 v13, v3, v3, v13 ; D282000D 04360703 v_rsq_clamp_f32_e32 v22, v13 ; 7E2C590D s_buffer_load_dword s1, s[24:27], 0x25 ; C2009925 s_buffer_load_dword s0, s[24:27], 0x26 ; C2001926 s_buffer_load_dword s3, s[24:27], 0x27 ; C2019927 s_buffer_load_dword s16, s[24:27], 0x0 ; C2081900 s_buffer_load_dword s15, s[24:27], 0x1 ; C2079901 s_buffer_load_dword s14, s[24:27], 0x2 ; C2071902 s_buffer_load_dword s11, s[24:27], 0x10 ; C2059910 s_buffer_load_dword s12, s[24:27], 0x11 ; C2061911 s_buffer_load_dword s13, s[24:27], 0x12 ; C2069912 s_buffer_load_dword s30, s[24:27], 0x13 ; C20F1913 s_buffer_load_dword s8, s[24:27], 0x14 ; C2041914 s_buffer_load_dword s9, s[24:27], 0x15 ; C2049915 s_buffer_load_dword s10, s[24:27], 0x16 ; C2051916 s_buffer_load_dword s31, s[24:27], 0x17 ; C20F9917 s_buffer_load_dword s22, s[24:27], 0x18 ; C20B1918 s_buffer_load_dword s21, s[24:27], 0x19 ; C20A9919 s_buffer_load_dword s20, s[24:27], 0x1a ; C20A191A s_buffer_load_dword s23, s[24:27], 0x1c ; C20B991C s_buffer_load_dword s28, s[24:27], 0x1d ; C20E191D s_buffer_load_dword s18, s[24:27], 0x1e ; C209191E s_buffer_load_dword s17, s[24:27], 0x1f ; C208991F s_buffer_load_dword s19, s[24:27], 0x20 ; C2099920 s_buffer_load_dword s2, s[24:27], 0x24 ; C2011924 v_mul_f32_e32 v13, v18, v18 ; 101A2512 v_mad_f32 v13, v19, v19, v13 ; D282000D 04362713 v_mad_f32 v13, v20, v20, v13 ; D282000D 04362914 v_rsq_clamp_f32_e32 v23, v13 ; 7E2E590D v_mov_b32_e32 v13, 1.0 ; 7E1A02F2 v_cmpx_le_f32_e32 vcc, 0, v14 ; 7C261C80 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_mov_b32_e32 v14, 1.0 ; 7E1C02F2 v_mov_b32_e32 v15, 1.0 ; 7E1E02F2 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ne_i32_e64 s[32:33], 0, s29 ; D10A0020 00003A80 v_mov_b32_e32 v17, s30 ; 7E22021E v_mov_b32_e32 v16, s31 ; 7E20021F s_and_saveexec_b64 s[30:31], s[32:33] ; BE9E2420 s_xor_b64 s[30:31], exec, s[30:31] ; 899E1E7E s_cbranch_execz BB0_2 ; BF880000 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_load_dwordx4 s[40:43], s[4:5], 0x0 ; C0940500 v_interp_p1_f32 v13, v0, 3, 4, [m0] ; C8341300 s_buffer_load_dword s29, s[24:27], 0x6 ; C20E9906 s_buffer_load_dword s44, s[24:27], 0x7 ; C2161907 s_buffer_load_dword s45, s[24:27], 0x4 ; C2169904 s_buffer_load_dword s46, s[24:27], 0x5 ; C2171905 v_interp_p2_f32 v13, [v13], v1, 3, 4, [m0] ; C8351301 v_interp_p1_f32 v14, v0, 1, 4, [m0] ; C8381100 v_rcp_f32_e32 v13, v13 ; 7E1A550D v_interp_p2_f32 v14, [v14], v1, 1, 4, [m0] ; C8391101 v_interp_p1_f32 v0, v0, 0, 4, [m0] ; C8001000 v_interp_p2_f32 v0, [v0], v1, 0, 4, [m0] ; C8011001 v_mul_f32_e32 v0, v0, v13 ; 10001B00 v_mul_f32_e32 v1, v14, v13 ; 10021B0E s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v13, s44 ; 7E1A022C v_mad_f32 v13, s45, v0, v13 ; D282000D 0436002D v_mov_b32_e32 v0, s29 ; 7E00021D v_mad_f32 v14, s46, v1, v0 ; D282000E 0402022E image_sample v[13:15], 7, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[32:39], s[40:43] ; F0800700 01480D0D s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[30:31] ; 88FE1E7E v_min_f32_e32 v0, 0x7f7fffff, v21 ; 1E002AFF 7F7FFFFF v_min_f32_e32 v1, 0x7f7fffff, v22 ; 1E022CFF 7F7FFFFF v_min_f32_e32 v21, 0x7f7fffff, v23 ; 1E2A2EFF 7F7FFFFF s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 s_load_dwordx4 s[32:35], s[4:5], 0xc ; C090050C s_load_dwordx4 s[24:27], s[4:5], 0x10 ; C08C0510 s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710 v_mul_f32_e32 v22, v0, v9 ; 102C1300 v_mul_f32_e32 v23, v0, v10 ; 102E1500 v_mul_f32_e32 v24, v22, v18 ; 10302516 v_mad_f32 v24, v19, v23, v24 ; D2820018 04622F13 v_mul_f32_e32 v25, v0, v11 ; 10321700 v_mad_f32 v24, v20, v25, v24 ; D2820018 04623314 v_add_f32_e64 v24, 0, v24 clamp ; D2060818 00023080 v_mov_b32_e32 v26, 0x7fffffff ; 7E3402FF 7FFFFFFF v_and_b32_e32 v27, v24, v26 ; 36363518 v_log_f32_e32 v27, v27 ; 7E364F1B s_load_dwordx8 s[48:55], s[6:7], 0x18 ; C0D80718 s_load_dwordx8 s[56:63], s[6:7], 0x20 ; C0DC0720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[28:30], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[40:47], s[36:39] ; F0800700 012A1C05 v_mul_legacy_f32_e32 v27, s28, v27 ; 0E36361C v_mov_b32_e32 v31, 0xb58637bd ; 7E3E02FF B58637BD v_add_f32_e32 v24, v31, v24 ; 0630311F v_exp_f32_e32 v27, v27 ; 7E364B1B v_cmp_gt_f32_e32 vcc, 0, v24 ; 7C083080 v_cndmask_b32_e64 v24, v27, 0, vcc ; D2000018 01A9011B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v27, 0x3e99999a, v28 ; 103638FF 3E99999A v_mov_b32_e32 v32, 0x3f170a3d ; 7E4002FF 3F170A3D v_mad_f32 v27, v29, v32, v27 ; D282001B 046E411D v_mov_b32_e32 v33, 0x3de147ae ; 7E4202FF 3DE147AE v_mad_f32 v27, v30, v33, v27 ; D282001B 046E431E v_sub_f32_e64 v34, 1.0, s23 ; D2080022 00002EF2 v_mul_f32_e32 v35, v28, v34 ; 1046451C v_mad_f32 v35, s23, v27, v35 ; D2820023 048E3617 v_mul_f32_e32 v36, v29, v34 ; 1048451D v_mad_f32 v36, s23, v27, v36 ; D2820024 04923617 v_mul_f32_e32 v34, v30, v34 ; 1044451E v_mad_f32 v27, s23, v27, v34 ; D282001B 048A3617 v_mul_f32_e32 v34, s22, v24 ; 10443016 v_mad_f32 v34, v35, v34, -v28 ; D2820022 84724523 v_mul_f32_e32 v35, s21, v24 ; 10463015 v_mad_f32 v35, v36, v35, -v29 ; D2820023 84764724 image_sample v36, 2, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[48:55], s[32:35] ; F0800200 010C2405 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v36, v36, v24 ; 10483124 v_mul_f32_e32 v24, s20, v24 ; 10303014 v_mad_f32 v24, v27, v24, -v30 ; D2820018 847A311B v_mad_f32 v27, v36, v34, v28 ; D282001B 04724524 v_mad_f32 v28, v36, v35, v29 ; D282001C 04764724 v_mad_f32 v24, v36, v24, v30 ; D2820018 047A3124 v_mul_f32_e32 v18, v21, v18 ; 10242515 v_mul_f32_e32 v19, v21, v19 ; 10262715 v_mul_f32_e32 v20, v21, v20 ; 10282915 v_mul_f32_e32 v21, v22, v18 ; 102A2516 v_mad_f32 v21, v19, v23, v21 ; D2820015 04562F13 v_mad_f32 v21, v20, v25, v21 ; D2820015 04563314 v_mul_f32_e32 v22, v18, v21 ; 102C2B12 v_mad_f32 v22, v21, v18, v22 ; D2820016 045A2515 v_mad_f32 v9, -v9, v0, v22 ; D2820009 245A0109 v_mul_f32_e32 v22, v19, v21 ; 102C2B13 v_mad_f32 v22, v21, v19, v22 ; D2820016 045A2715 v_mad_f32 v10, -v10, v0, v22 ; D282000A 245A010A v_mul_f32_e32 v22, v20, v21 ; 102C2B14 v_mad_f32 v21, v21, v20, v22 ; D2820015 045A2915 v_mad_f32 v0, -v11, v0, v21 ; D2820000 2456010B image_sample v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[56:63], s[24:27] ; F0800700 00CE1505 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v5, s19, v21 ; 100A2A13 v_mul_f32_e32 v6, s19, v22 ; 100C2C13 v_mul_f32_e32 v11, s19, v23 ; 10162E13 v_mul_f32_e32 v21, 0x3e99999a, v27 ; 102A36FF 3E99999A v_mad_f32 v21, v28, v32, v21 ; D2820015 0456411C v_mad_f32 v21, v24, v33, v21 ; D2820015 04564318 v_sub_f32_e64 v22, 1.0, s18 ; D2080016 000024F2 v_mul_f32_e32 v23, v27, v22 ; 102E2D1B v_mul_f32_e32 v25, v28, v22 ; 10322D1C v_mul_f32_e32 v22, v24, v22 ; 102C2D18 v_mad_f32 v23, s18, v21, v23 ; D2820017 045E2A12 v_mad_f32 v24, s18, v21, v25 ; D2820018 04662A12 v_mad_f32 v21, s18, v21, v22 ; D2820015 045A2A12 v_mul_f32_e32 v22, s17, v23 ; 102C2E11 v_sub_f32_e64 v23, 1.0, s16 ; D2080017 000020F2 v_mul_f32_e32 v22, v22, v23 ; 102C2F16 v_mul_f32_e32 v23, s17, v24 ; 102E3011 v_sub_f32_e64 v24, 1.0, s15 ; D2080018 00001EF2 v_mul_f32_e32 v23, v23, v24 ; 102E3117 v_mul_f32_e32 v21, s17, v21 ; 102A2A11 v_sub_f32_e64 v24, 1.0, s14 ; D2080018 00001CF2 v_mul_f32_e32 v21, v21, v24 ; 102A3115 v_mad_f32 v22, v17, v22, s11 ; D2820016 002E2D11 v_mad_f32 v23, v23, v17, s12 ; D2820017 00322317 v_mad_f32 v17, v21, v17, s13 ; D2820011 00362315 v_max_f32_e32 v12, 0, v12 ; 20181880 v_add_f32_e32 v21, v31, v12 ; 062A191F v_cmp_gt_f32_e32 vcc, 0, v21 ; 7C082A80 v_mul_f32_e32 v5, s17, v5 ; 100A0A11 v_mul_f32_e32 v6, s17, v6 ; 100C0C11 v_mul_f32_e32 v11, s17, v11 ; 10161611 v_mad_f32 v5, v16, v5, s8 ; D2820005 00220B10 v_mad_f32 v6, v6, v16, s9 ; D2820006 00262106 v_mad_f32 v11, v11, v16, s10 ; D282000B 002A210B v_mul_f32_e32 v2, v1, v2 ; 10040501 v_mul_f32_e32 v4, v1, v4 ; 10080901 v_and_b32_e32 v12, v12, v26 ; 3618350C v_log_f32_e32 v12, v12 ; 7E184F0C v_mul_f32_e32 v16, v2, v18 ; 10202502 v_mad_f32 v16, v19, v4, v16 ; D2820010 04420913 v_mul_f32_e32 v2, v2, v9 ; 10041302 v_mul_legacy_f32_e32 v9, s3, v12 ; 0E121803 v_exp_f32_e32 v9, v9 ; 7E124B09 v_cndmask_b32_e64 v9, v9, 0, vcc ; D2000009 01A90109 v_mad_f32 v2, v10, v4, v2 ; D2820002 040A090A v_mul_f32_e32 v1, v1, v3 ; 10020701 v_mad_f32 v3, v20, v1, v16 ; D2820003 04420314 v_mad_f32 v0, v0, v1, v2 ; D2820000 040A0300 v_add_f32_e64 v1, 0, v3 clamp ; D2060801 00020680 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_and_b32_e32 v2, v0, v26 ; 36043500 v_log_f32_e32 v2, v2 ; 7E044F02 v_add_f32_e32 v3, v31, v1 ; 0606031F v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 v_add_f32_e32 v0, v31, v0 ; 0600011F v_mul_legacy_f32_e32 v2, 0x41700000, v2 ; 0E0404FF 41700000 v_exp_f32_e32 v2, v2 ; 7E044B02 v_mul_f32_e32 v2, 0x3f6a46ad, v2 ; 100404FF 3F6A46AD v_mul_f32_e32 v3, v2, v5 ; 10060B02 v_mul_f32_e32 v4, v2, v6 ; 10080D02 v_mul_f32_e32 v2, v2, v11 ; 10041702 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v3, 0, vcc ; D2000000 01A90103 v_cndmask_b32_e64 v3, v4, 0, vcc ; D2000003 01A90104 v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_mad_f32 v0, v22, v1, v0 ; D2820000 04020316 v_mad_f32 v3, v23, v1, v3 ; D2820003 040E0317 v_mad_f32 v1, v17, v1, v2 ; D2820001 040A0311 v_mul_f32_e32 v0, v0, v9 ; 10001300 v_mul_f32_e32 v2, v3, v9 ; 10041303 v_mul_f32_e32 v1, v1, v9 ; 10021301 v_mul_f32_e32 v0, v0, v13 ; 10001B00 v_mul_f32_e32 v2, v2, v14 ; 10041D02 v_mul_f32_e32 v1, v1, v15 ; 10021F01 v_mul_f32_e32 v0, s2, v0 ; 10000002 v_mul_f32_e32 v2, s1, v2 ; 10040401 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v0, v0, v7 ; 10000F00 v_mul_f32_e32 v2, v2, v7 ; 10040F02 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_xor_b32_e32 v3, 0x80000000, v8 ; 3A0610FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v0, 0, v0, vcc ; D2000000 01AA0080 v_cndmask_b32_e64 v2, 0, v2, vcc ; D2000002 01AA0480 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 v_cndmask_b32_e64 v1, 0, v1, vcc ; D2000001 01AA0280 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 40 Code Size: 1572 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[4], PERSPECTIVE DCL IN[2], TEXCOORD[5], PERSPECTIVE DCL IN[3], TEXCOORD[6], PERSPECTIVE DCL IN[4], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..7] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { -0.0000, 8.0000, 0.0398, 0.0000} IMM[2] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[3], IN[3] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[3], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[1], IN[1] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[1], TEMP[0].xxxx 8: TEX TEMP[3], IN[0], SAMP[1], 2D 9: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[0].x, TEMP[3], TEMP[3] 11: RSQ TEMP[0].x, TEMP[0].xxxx 12: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 13: MUL TEMP[4].xyz, TEMP[3], TEMP[0].xxxx 14: DP3 TEMP[1].w, TEMP[4], TEMP[1] 15: MUL TEMP[3].xyz, TEMP[1].wwww, TEMP[4] 16: MAD TEMP[1].xyz, TEMP[3], IMM[0].xxxx, -TEMP[1] 17: DP3 TEMP[1].w, IN[2], IN[2] 18: RSQ TEMP[0], |TEMP[1].wwww| 19: MIN TEMP[2].w, IMM[2].xxxx, TEMP[0] 20: MUL TEMP[3].xyz, TEMP[2].wwww, IN[2] 21: DP3 TEMP[2].w, TEMP[3], -CONST[9] 22: ADD TEMP[2].w, TEMP[2].wwww, -CONST[10].xxxx 23: MUL_SAT TEMP[2].w, TEMP[2].wwww, CONST[10].yyyy 24: MUL TEMP[2].w, TEMP[2].wwww, TEMP[2].wwww 25: ADD TEMP[1].w, -TEMP[1].wwww, IMM[0].zzzz 26: MUL TEMP[3], TEMP[2].wwww, TEMP[1].wwww 27: MOV TEMP[5], TEMP[3] 28: KILL_IF TEMP[5] 29: UIF CONST[240].xxxx :0 30: RCP TEMP[3].x, IN[4].wwww 31: MUL TEMP[3].xy, TEMP[3].xxxx, IN[4] 32: MAD TEMP[3].xy, TEMP[3], CONST[1], CONST[1].wzzw 33: TEX TEMP[5], TEMP[3], SAMP[0], 2D 34: ELSE :36 35: MOV TEMP[5].xyz, IMM[0].zzzz 36: ENDIF 37: MOV TEMP[3].z, IMM[0].zzzz 38: ADD TEMP[3].xyz, TEMP[3].zzzz, -CONST[0] 39: TEX TEMP[6], IN[0], SAMP[2], 2D 40: MUL TEMP[3].xyz, TEMP[3], TEMP[6] 41: MAD TEMP[3].xyz, TEMP[3], CONST[4].wwww, CONST[4] 42: MAX TEMP[4].w, TEMP[1].wwww, IMM[0].wwww 43: ADD TEMP[1].w, TEMP[4].wwww, IMM[1].xxxx 44: POW TEMP[5].w, |TEMP[4].wwww|, CONST[8].wwww 45: CMP TEMP[1].w, TEMP[1].wwww, IMM[0].wwww, TEMP[5].wwww 46: MUL TEMP[6].xyz, CONST[6], CONST[6].wwww 47: TEX TEMP[7], IN[0], SAMP[3], 2D 48: MUL TEMP[6].xyz, TEMP[6], TEMP[7] 49: MAD TEMP[6].xyz, TEMP[6], CONST[5].wwww, CONST[5] 50: TEX TEMP[7], IN[0], SAMP[4], 2D 51: MUL TEMP[4].w, TEMP[7].xxxx, CONST[7].xxxx 52: MAX TEMP[5].w, TEMP[4].wwww, CONST[7].yyyy 53: MIN TEMP[4].w, CONST[7].xxxx, TEMP[5].wwww 54: DP3_SAT TEMP[4].x, TEMP[4], TEMP[2] 55: ADD TEMP[4].y, TEMP[4].xxxx, IMM[1].xxxx 56: DP3_SAT TEMP[1].x, TEMP[1], TEMP[2] 57: ADD TEMP[1].y, TEMP[1].xxxx, IMM[1].xxxx 58: POW TEMP[2].x, |TEMP[1].xxxx|, TEMP[4].wwww 59: ADD TEMP[1].x, TEMP[4].wwww, IMM[1].yyyy 60: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 61: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz 62: MUL TEMP[2].xyz, TEMP[3], TEMP[4].xxxx 63: CMP TEMP[2].xyz, TEMP[4].yyyy, IMM[0].wwww, TEMP[2] 64: MUL TEMP[3].xyz, TEMP[6], TEMP[1].xxxx 65: CMP TEMP[1].xyz, TEMP[1].yyyy, IMM[0].wwww, TEMP[3] 66: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 67: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1] 68: MUL TEMP[1].xyz, TEMP[5], TEMP[1] 69: MUL TEMP[1].xyz, TEMP[1], CONST[8] 70: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[1] 71: CMP OUT[0].xyz, -TEMP[3].wwww, TEMP[1], IMM[0].wwww 72: MOV OUT[0].w, IMM[0].wwww 73: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %51 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %52 = load <32 x i8>, <32 x i8> addrspace(2)* %51, align 32, !tbaa !0 %53 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %56 = bitcast <8 x i32> addrspace(2)* %55 to <32 x i8> addrspace(2)* %57 = load <32 x i8>, <32 x i8> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %59 = bitcast <4 x i32> addrspace(2)* %58 to <16 x i8> addrspace(2)* %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %62 = bitcast <8 x i32> addrspace(2)* %61 to <32 x i8> addrspace(2)* %63 = load <32 x i8>, <32 x i8> addrspace(2)* %62, align 32, !tbaa !0 %64 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %65 = bitcast <4 x i32> addrspace(2)* %64 to <16 x i8> addrspace(2)* %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %68 = bitcast <8 x i32> addrspace(2)* %67 to <32 x i8> addrspace(2)* %69 = load <32 x i8>, <32 x i8> addrspace(2)* %68, align 32, !tbaa !0 %70 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %71 = bitcast <4 x i32> addrspace(2)* %70 to <16 x i8> addrspace(2)* %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %74 = bitcast <8 x i32> addrspace(2)* %73 to <32 x i8> addrspace(2)* %75 = load <32 x i8>, <32 x i8> addrspace(2)* %74, align 32, !tbaa !0 %76 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %77 = bitcast <4 x i32> addrspace(2)* %76 to <16 x i8> addrspace(2)* %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 %79 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %90 = fmul float %87, %87 %91 = fmul float %88, %88 %92 = fadd float %91, %90 %93 = fmul float %89, %89 %94 = fadd float %92, %93 %95 = call float @llvm.AMDGPU.rsq.clamped.f32(float %94) %96 = call float @llvm.minnum.f32(float %95, float 0x47EFFFFFE0000000) %97 = fmul float %87, %96 %98 = fmul float %88, %96 %99 = fmul float %89, %96 %100 = fmul float %81, %81 %101 = fmul float %82, %82 %102 = fadd float %101, %100 %103 = fmul float %83, %83 %104 = fadd float %102, %103 %105 = call float @llvm.AMDGPU.rsq.clamped.f32(float %104) %106 = call float @llvm.minnum.f32(float %105, float 0x47EFFFFFE0000000) %107 = fmul float %81, %106 %108 = fmul float %82, %106 %109 = fmul float %83, %106 %110 = bitcast float %79 to i32 %111 = bitcast float %80 to i32 %112 = insertelement <2 x i32> undef, i32 %110, i32 0 %113 = insertelement <2 x i32> %112, i32 %111, i32 1 %114 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %113, <32 x i8> %57, <16 x i8> %60, i32 2) %115 = extractelement <4 x float> %114, i32 0 %116 = extractelement <4 x float> %114, i32 1 %117 = extractelement <4 x float> %114, i32 2 %118 = fmul float %115, 2.000000e+00 %119 = fadd float %118, -1.000000e+00 %120 = fmul float %116, 2.000000e+00 %121 = fadd float %120, -1.000000e+00 %122 = fmul float %117, 2.000000e+00 %123 = fadd float %122, -1.000000e+00 %124 = fmul float %119, %119 %125 = fmul float %121, %121 %126 = fadd float %125, %124 %127 = fmul float %123, %123 %128 = fadd float %126, %127 %129 = call float @llvm.AMDGPU.rsq.clamped.f32(float %128) %130 = call float @llvm.minnum.f32(float %129, float 0x47EFFFFFE0000000) %131 = fmul float %119, %130 %132 = fmul float %121, %130 %133 = fmul float %123, %130 %134 = fmul float %131, %97 %135 = fmul float %132, %98 %136 = fadd float %135, %134 %137 = fmul float %133, %99 %138 = fadd float %136, %137 %139 = fmul float %138, %131 %140 = fmul float %138, %132 %141 = fmul float %138, %133 %142 = fmul float %139, 2.000000e+00 %143 = fsub float %142, %97 %144 = fmul float %140, 2.000000e+00 %145 = fsub float %144, %98 %146 = fmul float %141, 2.000000e+00 %147 = fsub float %146, %99 %148 = fmul float %84, %84 %149 = fmul float %85, %85 %150 = fadd float %149, %148 %151 = fmul float %86, %86 %152 = fadd float %150, %151 %153 = call float @fabs(float %152) %154 = call float @llvm.AMDGPU.rsq.clamped.f32(float %153) %155 = call float @llvm.minnum.f32(float %154, float 0x47EFFFFFE0000000) %156 = fmul float %155, %84 %157 = fmul float %155, %85 %158 = fmul float %155, %86 %159 = fmul float %45, %156 %160 = fsub float -0.000000e+00, %159 %161 = fmul float %46, %157 %162 = fsub float %160, %161 %163 = fmul float %47, %158 %164 = fsub float %162, %163 %165 = fsub float %164, %48 %166 = fmul float %165, %49 %167 = call float @llvm.AMDIL.clamp.(float %166, float 0.000000e+00, float 1.000000e+00) %168 = fmul float %167, %167 %169 = fsub float 1.000000e+00, %152 %170 = fmul float %168, %169 %171 = fmul float %168, %169 %172 = fmul float %168, %169 %173 = fmul float %168, %169 %174 = fcmp olt float %170, 0.000000e+00 %175 = fcmp olt float %171, 0.000000e+00 %176 = fcmp olt float %172, 0.000000e+00 %177 = fcmp olt float %173, 0.000000e+00 %178 = or i1 %177, %176 %179 = or i1 %178, %175 %180 = or i1 %179, %174 %181 = select i1 %180, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %181) %182 = bitcast float %50 to i32 %183 = icmp eq i32 %182, 0 br i1 %183, label %ENDIF, label %IF IF: ; preds = %main_body %184 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %185 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %186 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %187 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %188 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %189 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %190 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %191 = fdiv float 1.000000e+00, %184 %192 = fmul float %191, %186 %193 = fmul float %191, %185 %194 = fmul float %192, %190 %195 = fadd float %194, %187 %196 = fmul float %193, %189 %197 = fadd float %196, %188 %198 = bitcast float %195 to i32 %199 = bitcast float %197 to i32 %200 = insertelement <2 x i32> undef, i32 %198, i32 0 %201 = insertelement <2 x i32> %200, i32 %199, i32 1 %202 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %201, <32 x i8> %52, <16 x i8> %54, i32 2) %203 = extractelement <4 x float> %202, i32 0 %204 = extractelement <4 x float> %202, i32 1 %205 = extractelement <4 x float> %202, i32 2 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp20.0 = phi float [ %203, %IF ], [ 1.000000e+00, %main_body ] %temp21.0 = phi float [ %204, %IF ], [ 1.000000e+00, %main_body ] %temp22.0 = phi float [ %205, %IF ], [ 1.000000e+00, %main_body ] %206 = fsub float 1.000000e+00, %24 %207 = fsub float 1.000000e+00, %25 %208 = fsub float 1.000000e+00, %26 %209 = bitcast float %79 to i32 %210 = bitcast float %80 to i32 %211 = insertelement <2 x i32> undef, i32 %209, i32 0 %212 = insertelement <2 x i32> %211, i32 %210, i32 1 %213 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %212, <32 x i8> %63, <16 x i8> %66, i32 2) %214 = extractelement <4 x float> %213, i32 0 %215 = extractelement <4 x float> %213, i32 1 %216 = extractelement <4 x float> %213, i32 2 %217 = fmul float %206, %214 %218 = fmul float %207, %215 %219 = fmul float %208, %216 %220 = fmul float %217, %30 %221 = fadd float %220, %27 %222 = fmul float %218, %30 %223 = fadd float %222, %28 %224 = fmul float %219, %30 %225 = fadd float %224, %29 %226 = call float @llvm.maxnum.f32(float %169, float 0.000000e+00) %227 = fadd float %226, 0xBEB0C6F7A0000000 %228 = call float @fabs(float %226) %229 = call float @llvm.pow.f32(float %228, float %44) %230 = call float @llvm.AMDGPU.cndlt(float %227, float 0.000000e+00, float %229) %231 = fmul float %35, %38 %232 = fmul float %36, %38 %233 = fmul float %37, %38 %234 = bitcast float %79 to i32 %235 = bitcast float %80 to i32 %236 = insertelement <2 x i32> undef, i32 %234, i32 0 %237 = insertelement <2 x i32> %236, i32 %235, i32 1 %238 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %237, <32 x i8> %69, <16 x i8> %72, i32 2) %239 = extractelement <4 x float> %238, i32 0 %240 = extractelement <4 x float> %238, i32 1 %241 = extractelement <4 x float> %238, i32 2 %242 = fmul float %231, %239 %243 = fmul float %232, %240 %244 = fmul float %233, %241 %245 = fmul float %242, %34 %246 = fadd float %245, %31 %247 = fmul float %243, %34 %248 = fadd float %247, %32 %249 = fmul float %244, %34 %250 = fadd float %249, %33 %251 = bitcast float %79 to i32 %252 = bitcast float %80 to i32 %253 = insertelement <2 x i32> undef, i32 %251, i32 0 %254 = insertelement <2 x i32> %253, i32 %252, i32 1 %255 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %254, <32 x i8> %75, <16 x i8> %78, i32 2) %256 = extractelement <4 x float> %255, i32 0 %257 = fmul float %256, %39 %258 = call float @llvm.maxnum.f32(float %257, float %40) %259 = call float @llvm.minnum.f32(float %39, float %258) %260 = fmul float %131, %107 %261 = fmul float %132, %108 %262 = fadd float %261, %260 %263 = fmul float %133, %109 %264 = fadd float %262, %263 %265 = call float @llvm.AMDIL.clamp.(float %264, float 0.000000e+00, float 1.000000e+00) %266 = fadd float %265, 0xBEB0C6F7A0000000 %267 = fmul float %143, %107 %268 = fmul float %145, %108 %269 = fadd float %268, %267 %270 = fmul float %147, %109 %271 = fadd float %269, %270 %272 = call float @llvm.AMDIL.clamp.(float %271, float 0.000000e+00, float 1.000000e+00) %273 = fadd float %272, 0xBEB0C6F7A0000000 %274 = call float @fabs(float %272) %275 = call float @llvm.pow.f32(float %274, float %259) %276 = fadd float %259, 8.000000e+00 %277 = fmul float %276, %275 %278 = fmul float %277, 0x3FA45F3060000000 %279 = fmul float %221, %265 %280 = fmul float %223, %265 %281 = fmul float %225, %265 %282 = call float @llvm.AMDGPU.cndlt(float %266, float 0.000000e+00, float %279) %283 = call float @llvm.AMDGPU.cndlt(float %266, float 0.000000e+00, float %280) %284 = call float @llvm.AMDGPU.cndlt(float %266, float 0.000000e+00, float %281) %285 = fmul float %246, %278 %286 = fmul float %248, %278 %287 = fmul float %250, %278 %288 = call float @llvm.AMDGPU.cndlt(float %273, float 0.000000e+00, float %285) %289 = call float @llvm.AMDGPU.cndlt(float %273, float 0.000000e+00, float %286) %290 = call float @llvm.AMDGPU.cndlt(float %273, float 0.000000e+00, float %287) %291 = fadd float %288, %282 %292 = fadd float %289, %283 %293 = fadd float %290, %284 %294 = fmul float %230, %291 %295 = fmul float %230, %292 %296 = fmul float %230, %293 %297 = fmul float %temp20.0, %294 %298 = fmul float %temp21.0, %295 %299 = fmul float %temp22.0, %296 %300 = fmul float %297, %41 %301 = fmul float %298, %42 %302 = fmul float %299, %43 %303 = fmul float %168, %300 %304 = fmul float %168, %301 %305 = fmul float %168, %302 %306 = fsub float -0.000000e+00, %173 %307 = call float @llvm.AMDGPU.cndlt(float %306, float %303, float 0.000000e+00) %308 = fsub float -0.000000e+00, %173 %309 = call float @llvm.AMDGPU.cndlt(float %308, float %304, float 0.000000e+00) %310 = fsub float -0.000000e+00, %173 %311 = call float @llvm.AMDGPU.cndlt(float %310, float %305, float 0.000000e+00) %312 = call i32 @llvm.SI.packf16(float %307, float %309) %313 = bitcast i32 %312 to float %314 = call i32 @llvm.SI.packf16(float %311, float 0.000000e+00) %315 = bitcast i32 %314 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %313, float %315, float %313, float %315) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v4, v0, 2, 1, [m0] ; C8100600 v_interp_p2_f32 v4, [v4], v1, 2, 1, [m0] ; C8110601 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900 v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 v_interp_p1_f32 v12, v0, 2, 2, [m0] ; C8300A00 v_interp_p2_f32 v12, [v12], v1, 2, 2, [m0] ; C8310A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 s_load_dwordx4 s[16:19], s[2:3], 0x0 ; C0880300 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v10, v0, 1, 3, [m0] ; C8280D00 v_interp_p2_f32 v10, [v10], v1, 1, 3, [m0] ; C8290D01 v_interp_p1_f32 v11, v0, 2, 3, [m0] ; C82C0E00 v_interp_p2_f32 v11, [v11], v1, 2, 3, [m0] ; C82D0E01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[8:11] ; F0800700 00450E02 v_mul_f32_e32 v13, v7, v7 ; 101A0F07 v_mad_f32 v13, v9, v9, v13 ; D282000D 04361309 v_mad_f32 v17, v12, v12, v13 ; D2820011 0436190C s_buffer_load_dword s0, s[16:19], 0x24 ; C2001124 s_buffer_load_dword s1, s[16:19], 0x25 ; C2009125 s_buffer_load_dword s2, s[16:19], 0x26 ; C2011126 s_buffer_load_dword s3, s[16:19], 0x28 ; C2019128 s_buffer_load_dword s8, s[16:19], 0x29 ; C2041129 v_rsq_clamp_f32_e64 v18, |v17| ; D3580112 00000111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v21, 2.0, v14, -1.0 ; D2820015 03CE1CF4 v_mad_f32 v13, 2.0, v15, -1.0 ; D282000D 03CE1EF4 v_mad_f32 v14, 2.0, v16, -1.0 ; D282000E 03CE20F4 v_min_f32_e32 v15, 0x7f7fffff, v18 ; 1E1E24FF 7F7FFFFF v_mul_f32_e32 v7, v7, v15 ; 100E1F07 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s0, v7 ; 100E0E00 v_mul_f32_e32 v9, v9, v15 ; 10121F09 v_mad_f32 v7, -s1, v9, -v7 ; D2820007 A41E1201 v_mul_f32_e32 v9, v12, v15 ; 10121F0C v_mad_f32 v7, -s2, v9, v7 ; D2820007 241E1202 v_subrev_f32_e32 v7, s3, v7 ; 0A0E0E03 v_mul_f32_e32 v7, s8, v7 ; 100E0E08 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 v_mul_f32_e32 v7, v7, v7 ; 100E0F07 v_sub_f32_e32 v12, 1.0, v17 ; 081822F2 v_mul_f32_e32 v9, v12, v7 ; 10120F0C v_cmp_gt_f32_e32 vcc, 0, v9 ; 7C081280 v_cndmask_b32_e64 v15, 0, -1.0, vcc ; D200000F 01A9E680 v_cndmask_b32_e64 v15, v15, -1.0, vcc ; D200000F 01A9E70F v_cndmask_b32_e64 v15, v15, -1.0, vcc ; D200000F 01A9E70F v_cndmask_b32_e64 v16, v15, -1.0, vcc ; D2000010 01A9E70F s_movk_i32 s0, 0xf00 ; B0000F00 s_buffer_load_dword s26, s[16:19], s0 ; C20D1000 v_mul_f32_e32 v15, v8, v8 ; 101E1108 v_mad_f32 v15, v10, v10, v15 ; D282000F 043E150A v_mad_f32 v15, v11, v11, v15 ; D282000F 043E170B v_rsq_clamp_f32_e32 v22, v15 ; 7E2C590F v_mul_f32_e32 v15, v5, v5 ; 101E0B05 v_mad_f32 v15, v6, v6, v15 ; D282000F 043E0D06 v_mad_f32 v15, v4, v4, v15 ; D282000F 043E0904 v_rsq_clamp_f32_e32 v23, v15 ; 7E2E590F s_buffer_load_dword s9, s[16:19], 0x23 ; C2049123 s_buffer_load_dword s23, s[16:19], 0x0 ; C20B9100 s_buffer_load_dword s24, s[16:19], 0x1 ; C20C1101 s_buffer_load_dword s25, s[16:19], 0x2 ; C20C9102 s_buffer_load_dword s13, s[16:19], 0x10 ; C2069110 s_buffer_load_dword s14, s[16:19], 0x11 ; C2071111 s_buffer_load_dword s15, s[16:19], 0x12 ; C2079112 s_buffer_load_dword s27, s[16:19], 0x13 ; C20D9113 s_buffer_load_dword s10, s[16:19], 0x14 ; C2051114 s_buffer_load_dword s11, s[16:19], 0x15 ; C2059115 s_buffer_load_dword s12, s[16:19], 0x16 ; C2061116 s_buffer_load_dword s28, s[16:19], 0x17 ; C20E1117 s_buffer_load_dword s22, s[16:19], 0x18 ; C20B1118 s_buffer_load_dword s21, s[16:19], 0x19 ; C20A9119 s_buffer_load_dword s20, s[16:19], 0x1a ; C20A111A s_buffer_load_dword s29, s[16:19], 0x1b ; C20E911B s_buffer_load_dword s3, s[16:19], 0x1c ; C201911C s_buffer_load_dword s8, s[16:19], 0x1d ; C204111D s_buffer_load_dword s0, s[16:19], 0x20 ; C2001120 s_buffer_load_dword s1, s[16:19], 0x21 ; C2009121 s_buffer_load_dword s2, s[16:19], 0x22 ; C2011122 v_mul_f32_e32 v15, v21, v21 ; 101E2B15 v_mad_f32 v15, v13, v13, v15 ; D282000F 043E1B0D v_mad_f32 v15, v14, v14, v15 ; D282000F 043E1D0E v_rsq_clamp_f32_e32 v24, v15 ; 7E30590F v_mov_b32_e32 v15, 1.0 ; 7E1E02F2 v_cmpx_le_f32_e32 vcc, 0, v16 ; 7C262080 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_mov_b32_e32 v16, 1.0 ; 7E2002F2 v_mov_b32_e32 v17, 1.0 ; 7E2202F2 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ne_i32_e64 s[30:31], 0, s26 ; D10A001E 00003480 v_mov_b32_e32 v19, s27 ; 7E26021B v_mov_b32_e32 v18, s28 ; 7E24021C v_mov_b32_e32 v20, s29 ; 7E28021D s_and_saveexec_b64 s[26:27], s[30:31] ; BE9A241E s_xor_b64 s[26:27], exec, s[26:27] ; 899A1A7E s_cbranch_execz BB0_2 ; BF880000 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_load_dwordx4 s[36:39], s[4:5], 0x0 ; C0920500 v_interp_p1_f32 v15, v0, 3, 4, [m0] ; C83C1300 s_buffer_load_dword s40, s[16:19], 0x6 ; C2141106 s_buffer_load_dword s41, s[16:19], 0x7 ; C2149107 s_buffer_load_dword s42, s[16:19], 0x4 ; C2151104 s_buffer_load_dword s43, s[16:19], 0x5 ; C2159105 v_interp_p2_f32 v15, [v15], v1, 3, 4, [m0] ; C83D1301 v_interp_p1_f32 v16, v0, 1, 4, [m0] ; C8401100 v_rcp_f32_e32 v15, v15 ; 7E1E550F v_interp_p2_f32 v16, [v16], v1, 1, 4, [m0] ; C8411101 v_interp_p1_f32 v0, v0, 0, 4, [m0] ; C8001000 v_interp_p2_f32 v0, [v0], v1, 0, 4, [m0] ; C8011001 v_mul_f32_e32 v0, v0, v15 ; 10001F00 v_mul_f32_e32 v1, v16, v15 ; 10021F10 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v15, s41 ; 7E1E0229 v_mad_f32 v15, s42, v0, v15 ; D282000F 043E002A v_mov_b32_e32 v0, s40 ; 7E000228 v_mad_f32 v16, s43, v1, v0 ; D2820010 0402022B image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[28:35], s[36:39] ; F0800700 01270F0F s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[26:27] ; 88FE1A7E v_min_f32_e32 v0, 0x7f7fffff, v22 ; 1E002CFF 7F7FFFFF v_min_f32_e32 v1, 0x7f7fffff, v23 ; 1E022EFF 7F7FFFFF v_min_f32_e32 v22, 0x7f7fffff, v24 ; 1E2C30FF 7F7FFFFF v_mul_f32_e32 v21, v22, v21 ; 102A2B16 s_load_dwordx4 s[44:47], s[4:5], 0x8 ; C0960508 s_load_dwordx4 s[32:35], s[4:5], 0xc ; C090050C s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510 s_load_dwordx8 s[48:55], s[6:7], 0x10 ; C0D80710 v_sub_f32_e64 v23, 1.0, s23 ; D2080017 00002EF2 v_sub_f32_e64 v24, 1.0, s24 ; D2080018 000030F2 v_sub_f32_e64 v25, 1.0, s25 ; D2080019 000032F2 s_load_dwordx8 s[36:43], s[6:7], 0x18 ; C0D20718 s_load_dwordx8 s[24:31], s[6:7], 0x20 ; C0CC0720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[48:55], s[44:47] ; F0800700 016C1A02 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v23, v26, v23 ; 102E2F1A v_mul_f32_e32 v24, v27, v24 ; 1030311B v_mul_f32_e32 v25, v28, v25 ; 1032331C v_mul_f32_e32 v26, s22, v20 ; 10342816 v_mul_f32_e32 v27, s21, v20 ; 10362815 v_mul_f32_e32 v20, s20, v20 ; 10282814 image_sample v[28:30], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[32:35] ; F0800700 01091C02 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v26, v28, v26 ; 1034351C v_mul_f32_e32 v27, v29, v27 ; 1036371D v_mul_f32_e32 v20, v30, v20 ; 1028291E v_mul_f32_e32 v13, v22, v13 ; 101A1B16 v_mul_f32_e32 v14, v22, v14 ; 101C1D16 v_mul_f32_e32 v22, v0, v8 ; 102C1100 v_mul_f32_e32 v22, v22, v21 ; 102C2B16 v_mul_f32_e32 v28, v0, v10 ; 10381500 v_mad_f32 v22, v13, v28, v22 ; D2820016 045A390D v_mul_f32_e32 v28, v0, v11 ; 10381700 v_mad_f32 v22, v14, v28, v22 ; D2820016 045A390E v_mul_f32_e32 v28, v21, v22 ; 10382D15 v_mad_f32 v28, v22, v21, v28 ; D282001C 04722B16 v_mad_f32 v8, -v8, v0, v28 ; D2820008 24720108 v_mul_f32_e32 v28, v13, v22 ; 10382D0D v_mad_f32 v28, v22, v13, v28 ; D282001C 04721B16 v_mad_f32 v10, -v10, v0, v28 ; D282000A 2472010A v_mul_f32_e32 v28, v14, v22 ; 10382D0E v_mad_f32 v22, v22, v14, v28 ; D2820016 04721D16 v_mad_f32 v0, -v11, v0, v22 ; D2820000 245A010B v_mul_f32_e32 v5, v1, v5 ; 100A0B01 v_mul_f32_e32 v6, v1, v6 ; 100C0D01 v_mul_f32_e32 v11, v5, v21 ; 10162B05 v_mad_f32 v11, v13, v6, v11 ; D282000B 042E0D0D v_mul_f32_e32 v5, v5, v8 ; 100A1105 v_mad_f32 v5, v10, v6, v5 ; D2820005 04160D0A v_mad_f32 v6, v19, v23, s13 ; D2820006 00362F13 v_mad_f32 v8, v24, v19, s14 ; D2820008 003A2718 v_mad_f32 v10, v25, v19, s15 ; D282000A 003E2719 v_max_f32_e32 v12, 0, v12 ; 20181880 v_mov_b32_e32 v13, 0x7fffffff ; 7E1A02FF 7FFFFFFF v_and_b32_e32 v19, v12, v13 ; 36261B0C v_log_f32_e32 v19, v19 ; 7E264F13 v_mov_b32_e32 v21, 0xb58637bd ; 7E2A02FF B58637BD v_add_f32_e32 v12, v21, v12 ; 06181915 v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[16:19] ; F0800100 00860202 v_mul_legacy_f32_e32 v3, s9, v19 ; 0E062609 v_exp_f32_e32 v3, v3 ; 7E064B03 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_mad_f32 v12, v18, v26, s10 ; D282000C 002A3512 v_mad_f32 v19, v27, v18, s11 ; D2820013 002E251B v_mad_f32 v18, v20, v18, s12 ; D2820012 00322514 v_mul_f32_e32 v1, v1, v4 ; 10020901 v_mad_f32 v4, v14, v1, v11 ; D2820004 042E030E v_mad_f32 v0, v0, v1, v5 ; D2820000 04160300 v_add_f32_e64 v1, 0, v4 clamp ; D2060801 00020880 v_mul_f32_e32 v4, v1, v6 ; 10080D01 v_mul_f32_e32 v5, v1, v8 ; 100A1101 v_mul_f32_e32 v6, v1, v10 ; 100C1501 v_add_f32_e32 v1, v21, v1 ; 06020315 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, v4, 0, vcc ; D2000001 01A90104 v_cndmask_b32_e64 v4, v5, 0, vcc ; D2000004 01A90105 v_cndmask_b32_e64 v5, v6, 0, vcc ; D2000005 01A90106 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, s3, v2 ; 10040403 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_and_b32_e32 v6, v0, v13 ; 360C1B00 v_log_f32_e32 v6, v6 ; 7E0C4F06 v_max_f32_e32 v2, s8, v2 ; 20040408 v_min_f32_e32 v2, s3, v2 ; 1E040403 v_add_f32_e32 v0, v21, v0 ; 06000115 v_mul_legacy_f32_e32 v6, v2, v6 ; 0E0C0D02 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_add_f32_e32 v2, 0x41000000, v2 ; 060404FF 41000000 v_mul_f32_e32 v2, v6, v2 ; 10040506 v_mul_f32_e32 v2, 0x3d22f983, v2 ; 100404FF 3D22F983 v_mul_f32_e32 v6, v2, v12 ; 100C1902 v_mul_f32_e32 v8, v2, v19 ; 10102702 v_mul_f32_e32 v2, v2, v18 ; 10042502 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v6, 0, vcc ; D2000000 01A90106 v_cndmask_b32_e64 v6, v8, 0, vcc ; D2000006 01A90108 v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_add_f32_e32 v0, v1, v0 ; 06000101 v_add_f32_e32 v1, v4, v6 ; 06020D04 v_add_f32_e32 v2, v5, v2 ; 06040505 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_mul_f32_e32 v1, v1, v3 ; 10020701 v_mul_f32_e32 v2, v2, v3 ; 10040702 v_mul_f32_e32 v0, v0, v15 ; 10001F00 v_mul_f32_e32 v1, v1, v16 ; 10022101 v_mul_f32_e32 v2, v2, v17 ; 10042302 v_mul_f32_e32 v0, s0, v0 ; 10000000 v_mul_f32_e32 v1, s1, v1 ; 10020201 v_mul_f32_e32 v2, s2, v2 ; 10040402 v_mul_f32_e32 v0, v0, v7 ; 10000F00 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_mul_f32_e32 v2, v2, v7 ; 10040F02 v_xor_b32_e32 v3, 0x80000000, v9 ; 3A0612FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v0, 0, v0, vcc ; D2000000 01AA0080 v_cndmask_b32_e64 v1, 0, v1, vcc ; D2000001 01AA0280 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cndmask_b32_e64 v1, 0, v2, vcc ; D2000001 01AA0480 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 32 Code Size: 1316 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[4], PERSPECTIVE DCL IN[2], TEXCOORD[5], PERSPECTIVE DCL IN[3], TEXCOORD[6], PERSPECTIVE DCL IN[4], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..7] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { -0.0000, 8.0000, 0.0398, 0.0000} IMM[2] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[3], IN[3] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[3], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[1], IN[1] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[1], TEMP[0].xxxx 8: TEX TEMP[3], IN[0], SAMP[1], 2D 9: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[0].x, TEMP[3], TEMP[3] 11: RSQ TEMP[0].x, TEMP[0].xxxx 12: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 13: MUL TEMP[4].xyz, TEMP[3], TEMP[0].xxxx 14: DP3 TEMP[1].w, TEMP[4], TEMP[1] 15: MUL TEMP[3].xyz, TEMP[1].wwww, TEMP[4] 16: MAD TEMP[1].xyz, TEMP[3], IMM[0].xxxx, -TEMP[1] 17: DP3 TEMP[1].w, IN[2], IN[2] 18: RSQ TEMP[0], |TEMP[1].wwww| 19: MIN TEMP[2].w, IMM[2].xxxx, TEMP[0] 20: MUL TEMP[3].xyz, TEMP[2].wwww, IN[2] 21: DP3 TEMP[2].w, TEMP[3], -CONST[9] 22: ADD TEMP[2].w, TEMP[2].wwww, -CONST[10].xxxx 23: MUL_SAT TEMP[2].w, TEMP[2].wwww, CONST[10].yyyy 24: MUL TEMP[2].w, TEMP[2].wwww, TEMP[2].wwww 25: ADD TEMP[1].w, -TEMP[1].wwww, IMM[0].zzzz 26: MUL TEMP[3], TEMP[2].wwww, TEMP[1].wwww 27: MOV TEMP[5], TEMP[3] 28: KILL_IF TEMP[5] 29: UIF CONST[240].xxxx :0 30: RCP TEMP[3].x, IN[4].wwww 31: MUL TEMP[3].xy, TEMP[3].xxxx, IN[4] 32: MAD TEMP[3].xy, TEMP[3], CONST[1], CONST[1].wzzw 33: TEX TEMP[5], TEMP[3], SAMP[0], 2D 34: ELSE :36 35: MOV TEMP[5].xyz, IMM[0].zzzz 36: ENDIF 37: MOV TEMP[3].z, IMM[0].zzzz 38: ADD TEMP[3].xyz, TEMP[3].zzzz, -CONST[0] 39: TEX TEMP[6], IN[0], SAMP[2], 2D 40: MUL TEMP[3].xyz, TEMP[3], TEMP[6] 41: MAD TEMP[3].xyz, TEMP[3], CONST[4].wwww, CONST[4] 42: MAX TEMP[4].w, TEMP[1].wwww, IMM[0].wwww 43: ADD TEMP[1].w, TEMP[4].wwww, IMM[1].xxxx 44: POW TEMP[5].w, |TEMP[4].wwww|, CONST[8].wwww 45: CMP TEMP[1].w, TEMP[1].wwww, IMM[0].wwww, TEMP[5].wwww 46: MUL TEMP[6].xyz, CONST[6], CONST[6].wwww 47: TEX TEMP[7], IN[0], SAMP[3], 2D 48: MUL TEMP[6].xyz, TEMP[6], TEMP[7] 49: MAD TEMP[6].xyz, TEMP[6], CONST[5].wwww, CONST[5] 50: DP3_SAT TEMP[4].x, TEMP[4], TEMP[2] 51: ADD TEMP[4].y, TEMP[4].xxxx, IMM[1].xxxx 52: DP3_SAT TEMP[1].x, TEMP[1], TEMP[2] 53: ADD TEMP[1].y, TEMP[1].xxxx, IMM[1].xxxx 54: POW TEMP[2].x, |TEMP[1].xxxx|, CONST[7].xxxx 55: MOV TEMP[2].y, IMM[1].yyyy 56: ADD TEMP[1].x, TEMP[2].yyyy, CONST[7].xxxx 57: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 58: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz 59: MUL TEMP[2].xyz, TEMP[3], TEMP[4].xxxx 60: CMP TEMP[2].xyz, TEMP[4].yyyy, IMM[0].wwww, TEMP[2] 61: MUL TEMP[3].xyz, TEMP[6], TEMP[1].xxxx 62: CMP TEMP[1].xyz, TEMP[1].yyyy, IMM[0].wwww, TEMP[3] 63: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 64: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1] 65: MUL TEMP[1].xyz, TEMP[5], TEMP[1] 66: MUL TEMP[1].xyz, TEMP[1], CONST[8] 67: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[1] 68: CMP OUT[0].xyz, -TEMP[3].wwww, TEMP[1], IMM[0].wwww 69: MOV OUT[0].w, IMM[0].wwww 70: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %50 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %51 = load <32 x i8>, <32 x i8> addrspace(2)* %50, align 32, !tbaa !0 %52 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %55 = bitcast <8 x i32> addrspace(2)* %54 to <32 x i8> addrspace(2)* %56 = load <32 x i8>, <32 x i8> addrspace(2)* %55, align 32, !tbaa !0 %57 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %58 = bitcast <4 x i32> addrspace(2)* %57 to <16 x i8> addrspace(2)* %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 %60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %61 = bitcast <8 x i32> addrspace(2)* %60 to <32 x i8> addrspace(2)* %62 = load <32 x i8>, <32 x i8> addrspace(2)* %61, align 32, !tbaa !0 %63 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %64 = bitcast <4 x i32> addrspace(2)* %63 to <16 x i8> addrspace(2)* %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0 %66 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %67 = bitcast <8 x i32> addrspace(2)* %66 to <32 x i8> addrspace(2)* %68 = load <32 x i8>, <32 x i8> addrspace(2)* %67, align 32, !tbaa !0 %69 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %70 = bitcast <4 x i32> addrspace(2)* %69 to <16 x i8> addrspace(2)* %71 = load <16 x i8>, <16 x i8> addrspace(2)* %70, align 16, !tbaa !0 %72 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %83 = fmul float %80, %80 %84 = fmul float %81, %81 %85 = fadd float %84, %83 %86 = fmul float %82, %82 %87 = fadd float %85, %86 %88 = call float @llvm.AMDGPU.rsq.clamped.f32(float %87) %89 = call float @llvm.minnum.f32(float %88, float 0x47EFFFFFE0000000) %90 = fmul float %80, %89 %91 = fmul float %81, %89 %92 = fmul float %82, %89 %93 = fmul float %74, %74 %94 = fmul float %75, %75 %95 = fadd float %94, %93 %96 = fmul float %76, %76 %97 = fadd float %95, %96 %98 = call float @llvm.AMDGPU.rsq.clamped.f32(float %97) %99 = call float @llvm.minnum.f32(float %98, float 0x47EFFFFFE0000000) %100 = fmul float %74, %99 %101 = fmul float %75, %99 %102 = fmul float %76, %99 %103 = bitcast float %72 to i32 %104 = bitcast float %73 to i32 %105 = insertelement <2 x i32> undef, i32 %103, i32 0 %106 = insertelement <2 x i32> %105, i32 %104, i32 1 %107 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %106, <32 x i8> %56, <16 x i8> %59, i32 2) %108 = extractelement <4 x float> %107, i32 0 %109 = extractelement <4 x float> %107, i32 1 %110 = extractelement <4 x float> %107, i32 2 %111 = fmul float %108, 2.000000e+00 %112 = fadd float %111, -1.000000e+00 %113 = fmul float %109, 2.000000e+00 %114 = fadd float %113, -1.000000e+00 %115 = fmul float %110, 2.000000e+00 %116 = fadd float %115, -1.000000e+00 %117 = fmul float %112, %112 %118 = fmul float %114, %114 %119 = fadd float %118, %117 %120 = fmul float %116, %116 %121 = fadd float %119, %120 %122 = call float @llvm.AMDGPU.rsq.clamped.f32(float %121) %123 = call float @llvm.minnum.f32(float %122, float 0x47EFFFFFE0000000) %124 = fmul float %112, %123 %125 = fmul float %114, %123 %126 = fmul float %116, %123 %127 = fmul float %124, %90 %128 = fmul float %125, %91 %129 = fadd float %128, %127 %130 = fmul float %126, %92 %131 = fadd float %129, %130 %132 = fmul float %131, %124 %133 = fmul float %131, %125 %134 = fmul float %131, %126 %135 = fmul float %132, 2.000000e+00 %136 = fsub float %135, %90 %137 = fmul float %133, 2.000000e+00 %138 = fsub float %137, %91 %139 = fmul float %134, 2.000000e+00 %140 = fsub float %139, %92 %141 = fmul float %77, %77 %142 = fmul float %78, %78 %143 = fadd float %142, %141 %144 = fmul float %79, %79 %145 = fadd float %143, %144 %146 = call float @fabs(float %145) %147 = call float @llvm.AMDGPU.rsq.clamped.f32(float %146) %148 = call float @llvm.minnum.f32(float %147, float 0x47EFFFFFE0000000) %149 = fmul float %148, %77 %150 = fmul float %148, %78 %151 = fmul float %148, %79 %152 = fmul float %44, %149 %153 = fsub float -0.000000e+00, %152 %154 = fmul float %45, %150 %155 = fsub float %153, %154 %156 = fmul float %46, %151 %157 = fsub float %155, %156 %158 = fsub float %157, %47 %159 = fmul float %158, %48 %160 = call float @llvm.AMDIL.clamp.(float %159, float 0.000000e+00, float 1.000000e+00) %161 = fmul float %160, %160 %162 = fsub float 1.000000e+00, %145 %163 = fmul float %161, %162 %164 = fmul float %161, %162 %165 = fmul float %161, %162 %166 = fmul float %161, %162 %167 = fcmp olt float %163, 0.000000e+00 %168 = fcmp olt float %164, 0.000000e+00 %169 = fcmp olt float %165, 0.000000e+00 %170 = fcmp olt float %166, 0.000000e+00 %171 = or i1 %170, %169 %172 = or i1 %171, %168 %173 = or i1 %172, %167 %174 = select i1 %173, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %174) %175 = bitcast float %49 to i32 %176 = icmp eq i32 %175, 0 br i1 %176, label %ENDIF, label %IF IF: ; preds = %main_body %177 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %178 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %179 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %180 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %181 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %182 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %183 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %184 = fdiv float 1.000000e+00, %177 %185 = fmul float %184, %179 %186 = fmul float %184, %178 %187 = fmul float %185, %183 %188 = fadd float %187, %180 %189 = fmul float %186, %182 %190 = fadd float %189, %181 %191 = bitcast float %188 to i32 %192 = bitcast float %190 to i32 %193 = insertelement <2 x i32> undef, i32 %191, i32 0 %194 = insertelement <2 x i32> %193, i32 %192, i32 1 %195 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %194, <32 x i8> %51, <16 x i8> %53, i32 2) %196 = extractelement <4 x float> %195, i32 0 %197 = extractelement <4 x float> %195, i32 1 %198 = extractelement <4 x float> %195, i32 2 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp20.0 = phi float [ %196, %IF ], [ 1.000000e+00, %main_body ] %temp21.0 = phi float [ %197, %IF ], [ 1.000000e+00, %main_body ] %temp22.0 = phi float [ %198, %IF ], [ 1.000000e+00, %main_body ] %199 = fsub float 1.000000e+00, %24 %200 = fsub float 1.000000e+00, %25 %201 = fsub float 1.000000e+00, %26 %202 = bitcast float %72 to i32 %203 = bitcast float %73 to i32 %204 = insertelement <2 x i32> undef, i32 %202, i32 0 %205 = insertelement <2 x i32> %204, i32 %203, i32 1 %206 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %205, <32 x i8> %62, <16 x i8> %65, i32 2) %207 = extractelement <4 x float> %206, i32 0 %208 = extractelement <4 x float> %206, i32 1 %209 = extractelement <4 x float> %206, i32 2 %210 = fmul float %199, %207 %211 = fmul float %200, %208 %212 = fmul float %201, %209 %213 = fmul float %210, %30 %214 = fadd float %213, %27 %215 = fmul float %211, %30 %216 = fadd float %215, %28 %217 = fmul float %212, %30 %218 = fadd float %217, %29 %219 = call float @llvm.maxnum.f32(float %162, float 0.000000e+00) %220 = fadd float %219, 0xBEB0C6F7A0000000 %221 = call float @fabs(float %219) %222 = call float @llvm.pow.f32(float %221, float %43) %223 = call float @llvm.AMDGPU.cndlt(float %220, float 0.000000e+00, float %222) %224 = fmul float %35, %38 %225 = fmul float %36, %38 %226 = fmul float %37, %38 %227 = bitcast float %72 to i32 %228 = bitcast float %73 to i32 %229 = insertelement <2 x i32> undef, i32 %227, i32 0 %230 = insertelement <2 x i32> %229, i32 %228, i32 1 %231 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %230, <32 x i8> %68, <16 x i8> %71, i32 2) %232 = extractelement <4 x float> %231, i32 0 %233 = extractelement <4 x float> %231, i32 1 %234 = extractelement <4 x float> %231, i32 2 %235 = fmul float %224, %232 %236 = fmul float %225, %233 %237 = fmul float %226, %234 %238 = fmul float %235, %34 %239 = fadd float %238, %31 %240 = fmul float %236, %34 %241 = fadd float %240, %32 %242 = fmul float %237, %34 %243 = fadd float %242, %33 %244 = fmul float %124, %100 %245 = fmul float %125, %101 %246 = fadd float %245, %244 %247 = fmul float %126, %102 %248 = fadd float %246, %247 %249 = call float @llvm.AMDIL.clamp.(float %248, float 0.000000e+00, float 1.000000e+00) %250 = fadd float %249, 0xBEB0C6F7A0000000 %251 = fmul float %136, %100 %252 = fmul float %138, %101 %253 = fadd float %252, %251 %254 = fmul float %140, %102 %255 = fadd float %253, %254 %256 = call float @llvm.AMDIL.clamp.(float %255, float 0.000000e+00, float 1.000000e+00) %257 = fadd float %256, 0xBEB0C6F7A0000000 %258 = call float @fabs(float %256) %259 = call float @llvm.pow.f32(float %258, float %39) %260 = fadd float %39, 8.000000e+00 %261 = fmul float %260, %259 %262 = fmul float %261, 0x3FA45F3060000000 %263 = fmul float %214, %249 %264 = fmul float %216, %249 %265 = fmul float %218, %249 %266 = call float @llvm.AMDGPU.cndlt(float %250, float 0.000000e+00, float %263) %267 = call float @llvm.AMDGPU.cndlt(float %250, float 0.000000e+00, float %264) %268 = call float @llvm.AMDGPU.cndlt(float %250, float 0.000000e+00, float %265) %269 = fmul float %239, %262 %270 = fmul float %241, %262 %271 = fmul float %243, %262 %272 = call float @llvm.AMDGPU.cndlt(float %257, float 0.000000e+00, float %269) %273 = call float @llvm.AMDGPU.cndlt(float %257, float 0.000000e+00, float %270) %274 = call float @llvm.AMDGPU.cndlt(float %257, float 0.000000e+00, float %271) %275 = fadd float %272, %266 %276 = fadd float %273, %267 %277 = fadd float %274, %268 %278 = fmul float %223, %275 %279 = fmul float %223, %276 %280 = fmul float %223, %277 %281 = fmul float %temp20.0, %278 %282 = fmul float %temp21.0, %279 %283 = fmul float %temp22.0, %280 %284 = fmul float %281, %40 %285 = fmul float %282, %41 %286 = fmul float %283, %42 %287 = fmul float %161, %284 %288 = fmul float %161, %285 %289 = fmul float %161, %286 %290 = fsub float -0.000000e+00, %166 %291 = call float @llvm.AMDGPU.cndlt(float %290, float %287, float 0.000000e+00) %292 = fsub float -0.000000e+00, %166 %293 = call float @llvm.AMDGPU.cndlt(float %292, float %288, float 0.000000e+00) %294 = fsub float -0.000000e+00, %166 %295 = call float @llvm.AMDGPU.cndlt(float %294, float %289, float 0.000000e+00) %296 = call i32 @llvm.SI.packf16(float %291, float %293) %297 = bitcast i32 %296 to float %298 = call i32 @llvm.SI.packf16(float %295, float 0.000000e+00) %299 = bitcast i32 %298 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %297, float %299, float %297, float %299) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v9, v0, 0, 1, [m0] ; C8240400 v_interp_p2_f32 v9, [v9], v1, 0, 1, [m0] ; C8250401 v_interp_p1_f32 v11, v0, 1, 1, [m0] ; C82C0500 v_interp_p2_f32 v11, [v11], v1, 1, 1, [m0] ; C82D0501 v_interp_p1_f32 v12, v0, 2, 1, [m0] ; C8300600 v_interp_p2_f32 v12, [v12], v1, 2, 1, [m0] ; C8310601 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00 v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01 v_interp_p1_f32 v4, v0, 0, 3, [m0] ; C8100C00 s_load_dwordx4 s[16:19], s[2:3], 0x0 ; C0880300 v_interp_p2_f32 v4, [v4], v1, 0, 3, [m0] ; C8110C01 v_interp_p1_f32 v7, v0, 1, 3, [m0] ; C81C0D00 v_interp_p2_f32 v7, [v7], v1, 1, 3, [m0] ; C81D0D01 v_interp_p1_f32 v8, v0, 2, 3, [m0] ; C8200E00 v_interp_p2_f32 v8, [v8], v1, 2, 3, [m0] ; C8210E01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[13:15], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[8:11] ; F0800700 00450D02 v_mul_f32_e32 v16, v5, v5 ; 10200B05 v_mad_f32 v16, v6, v6, v16 ; D2820010 04420D06 v_mad_f32 v16, v10, v10, v16 ; D2820010 0442150A s_buffer_load_dword s0, s[16:19], 0x24 ; C2001124 s_buffer_load_dword s1, s[16:19], 0x25 ; C2009125 s_buffer_load_dword s2, s[16:19], 0x26 ; C2011126 s_buffer_load_dword s3, s[16:19], 0x28 ; C2019128 s_buffer_load_dword s8, s[16:19], 0x29 ; C2041129 v_rsq_clamp_f32_e64 v17, |v16| ; D3580111 00000110 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v19, 2.0, v13, -1.0 ; D2820013 03CE1AF4 v_mad_f32 v20, 2.0, v14, -1.0 ; D2820014 03CE1CF4 v_mad_f32 v21, 2.0, v15, -1.0 ; D2820015 03CE1EF4 v_min_f32_e32 v13, 0x7f7fffff, v17 ; 1E1A22FF 7F7FFFFF v_mul_f32_e32 v5, v5, v13 ; 100A1B05 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s0, v5 ; 100A0A00 v_mul_f32_e32 v6, v6, v13 ; 100C1B06 v_mad_f32 v5, -s1, v6, -v5 ; D2820005 A4160C01 v_mul_f32_e32 v6, v10, v13 ; 100C1B0A v_mad_f32 v5, -s2, v6, v5 ; D2820005 24160C02 v_subrev_f32_e32 v5, s3, v5 ; 0A0A0A03 v_mul_f32_e32 v5, s8, v5 ; 100A0A08 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_sub_f32_e32 v10, 1.0, v16 ; 081420F2 v_mul_f32_e32 v6, v10, v5 ; 100C0B0A v_cmp_gt_f32_e32 vcc, 0, v6 ; 7C080C80 v_cndmask_b32_e64 v13, 0, -1.0, vcc ; D200000D 01A9E680 v_cndmask_b32_e64 v13, v13, -1.0, vcc ; D200000D 01A9E70D v_cndmask_b32_e64 v13, v13, -1.0, vcc ; D200000D 01A9E70D v_cndmask_b32_e64 v14, v13, -1.0, vcc ; D200000E 01A9E70D s_movk_i32 s0, 0xf00 ; B0000F00 s_buffer_load_dword s25, s[16:19], s0 ; C20C9000 v_mul_f32_e32 v13, v4, v4 ; 101A0904 v_mad_f32 v13, v7, v7, v13 ; D282000D 04360F07 v_mad_f32 v13, v8, v8, v13 ; D282000D 04361108 v_rsq_clamp_f32_e32 v22, v13 ; 7E2C590D v_mul_f32_e32 v13, v9, v9 ; 101A1309 v_mad_f32 v13, v11, v11, v13 ; D282000D 0436170B v_mad_f32 v13, v12, v12, v13 ; D282000D 0436190C v_rsq_clamp_f32_e32 v23, v13 ; 7E2E590D s_buffer_load_dword s24, s[16:19], 0x0 ; C20C1100 s_buffer_load_dword s23, s[16:19], 0x1 ; C20B9101 s_buffer_load_dword s22, s[16:19], 0x2 ; C20B1102 s_buffer_load_dword s11, s[16:19], 0x10 ; C2059110 s_buffer_load_dword s12, s[16:19], 0x11 ; C2061111 s_buffer_load_dword s13, s[16:19], 0x12 ; C2069112 s_buffer_load_dword s26, s[16:19], 0x13 ; C20D1113 s_buffer_load_dword s9, s[16:19], 0x14 ; C2049114 s_buffer_load_dword s10, s[16:19], 0x15 ; C2051115 s_buffer_load_dword s8, s[16:19], 0x16 ; C2041116 s_buffer_load_dword s27, s[16:19], 0x17 ; C20D9117 s_buffer_load_dword s21, s[16:19], 0x18 ; C20A9118 s_buffer_load_dword s20, s[16:19], 0x19 ; C20A1119 s_buffer_load_dword s15, s[16:19], 0x1a ; C207911A s_buffer_load_dword s28, s[16:19], 0x1b ; C20E111B s_buffer_load_dword s3, s[16:19], 0x1c ; C201911C s_buffer_load_dword s2, s[16:19], 0x20 ; C2011120 s_buffer_load_dword s1, s[16:19], 0x21 ; C2009121 s_buffer_load_dword s0, s[16:19], 0x22 ; C2001122 s_buffer_load_dword s14, s[16:19], 0x23 ; C2071123 v_mul_f32_e32 v13, v19, v19 ; 101A2713 v_mad_f32 v13, v20, v20, v13 ; D282000D 04362914 v_mad_f32 v13, v21, v21, v13 ; D282000D 04362B15 v_rsq_clamp_f32_e32 v24, v13 ; 7E30590D v_mov_b32_e32 v13, 1.0 ; 7E1A02F2 v_cmpx_le_f32_e32 vcc, 0, v14 ; 7C261C80 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_mov_b32_e32 v14, 1.0 ; 7E1C02F2 v_mov_b32_e32 v15, 1.0 ; 7E1E02F2 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ne_i32_e64 s[30:31], 0, s25 ; D10A001E 00003280 v_mov_b32_e32 v17, s26 ; 7E22021A v_mov_b32_e32 v16, s27 ; 7E20021B v_mov_b32_e32 v18, s28 ; 7E24021C s_and_saveexec_b64 s[26:27], s[30:31] ; BE9A241E s_xor_b64 s[26:27], exec, s[26:27] ; 899A1A7E s_cbranch_execz BB0_2 ; BF880000 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_load_dwordx4 s[36:39], s[4:5], 0x0 ; C0920500 v_interp_p1_f32 v13, v0, 3, 4, [m0] ; C8341300 s_buffer_load_dword s25, s[16:19], 0x6 ; C20C9106 s_buffer_load_dword s40, s[16:19], 0x7 ; C2141107 s_buffer_load_dword s41, s[16:19], 0x4 ; C2149104 s_buffer_load_dword s42, s[16:19], 0x5 ; C2151105 v_interp_p2_f32 v13, [v13], v1, 3, 4, [m0] ; C8351301 v_interp_p1_f32 v14, v0, 1, 4, [m0] ; C8381100 v_rcp_f32_e32 v13, v13 ; 7E1A550D v_interp_p2_f32 v14, [v14], v1, 1, 4, [m0] ; C8391101 v_interp_p1_f32 v0, v0, 0, 4, [m0] ; C8001000 v_interp_p2_f32 v0, [v0], v1, 0, 4, [m0] ; C8011001 v_mul_f32_e32 v0, v0, v13 ; 10001B00 v_mul_f32_e32 v1, v14, v13 ; 10021B0E s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v13, s40 ; 7E1A0228 v_mad_f32 v13, s41, v0, v13 ; D282000D 04360029 v_mov_b32_e32 v0, s25 ; 7E000219 v_mad_f32 v14, s42, v1, v0 ; D282000E 0402022A image_sample v[13:15], 7, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[28:35], s[36:39] ; F0800700 01270D0D s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[26:27] ; 88FE1A7E v_min_f32_e32 v0, 0x7f7fffff, v22 ; 1E002CFF 7F7FFFFF v_min_f32_e32 v1, 0x7f7fffff, v23 ; 1E022EFF 7F7FFFFF v_mul_f32_e32 v9, v1, v9 ; 10121301 v_mul_f32_e32 v11, v1, v11 ; 10161701 v_mul_f32_e32 v1, v1, v12 ; 10021901 v_min_f32_e32 v12, 0x7f7fffff, v24 ; 1E1830FF 7F7FFFFF v_mul_f32_e32 v19, v12, v19 ; 1026270C v_mul_f32_e32 v20, v12, v20 ; 1028290C v_mul_f32_e32 v12, v12, v21 ; 10182B0C v_sub_f32_e64 v21, 1.0, s24 ; D2080015 000030F2 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_load_dwordx4 s[32:35], s[4:5], 0xc ; C090050C s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 s_load_dwordx8 s[24:31], s[6:7], 0x18 ; C0CC0718 v_sub_f32_e64 v22, 1.0, s23 ; D2080016 00002EF2 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[16:19] ; F0800700 00891702 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v23, v21 ; 102A2B17 v_mul_f32_e32 v22, v24, v22 ; 102C2D18 v_sub_f32_e64 v23, 1.0, s22 ; D2080017 00002CF2 v_mul_f32_e32 v23, v25, v23 ; 102E2F19 v_mul_f32_e32 v24, s21, v18 ; 10302415 v_mul_f32_e32 v25, s20, v18 ; 10322414 v_mul_f32_e32 v18, s15, v18 ; 1024240F image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[32:35] ; F0800700 01061A02 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v26, v24 ; 1004311A v_mul_f32_e32 v3, v27, v25 ; 1006331B v_mul_f32_e32 v18, v28, v18 ; 1024251C v_mul_f32_e32 v24, v0, v4 ; 10300900 v_mul_f32_e32 v24, v24, v19 ; 10302718 v_mul_f32_e32 v25, v0, v7 ; 10320F00 v_mad_f32 v24, v20, v25, v24 ; D2820018 04623314 v_mul_f32_e32 v25, v0, v8 ; 10321100 v_mad_f32 v24, v12, v25, v24 ; D2820018 0462330C v_mul_f32_e32 v25, v19, v24 ; 10323113 v_mad_f32 v25, v24, v19, v25 ; D2820019 04662718 v_mad_f32 v4, -v4, v0, v25 ; D2820004 24660104 v_mul_f32_e32 v25, v20, v24 ; 10323114 v_mad_f32 v25, v24, v20, v25 ; D2820019 04662918 v_mad_f32 v7, -v7, v0, v25 ; D2820007 24660107 v_mul_f32_e32 v25, v12, v24 ; 1032310C v_mad_f32 v24, v24, v12, v25 ; D2820018 04661918 v_mad_f32 v0, -v8, v0, v24 ; D2820000 24620108 v_mad_f32 v8, v17, v21, s11 ; D2820008 002E2B11 v_mad_f32 v21, v22, v17, s12 ; D2820015 00322316 v_mad_f32 v17, v23, v17, s13 ; D2820011 00362317 v_max_f32_e32 v10, 0, v10 ; 20141480 v_mov_b32_e32 v22, 0xb58637bd ; 7E2C02FF B58637BD v_add_f32_e32 v23, v22, v10 ; 062E1516 v_cmp_gt_f32_e32 vcc, 0, v23 ; 7C082E80 v_mad_f32 v2, v16, v2, s9 ; D2820002 00260510 v_mad_f32 v3, v3, v16, s10 ; D2820003 002A2103 v_mov_b32_e32 v23, 0x7fffffff ; 7E2E02FF 7FFFFFFF v_and_b32_e32 v10, v10, v23 ; 36142F0A v_log_f32_e32 v10, v10 ; 7E144F0A v_mad_f32 v16, v18, v16, s8 ; D2820010 00222112 v_mul_f32_e32 v18, v9, v19 ; 10242709 v_mad_f32 v18, v20, v11, v18 ; D2820012 044A1714 v_mul_legacy_f32_e32 v10, s14, v10 ; 0E14140E v_exp_f32_e32 v10, v10 ; 7E144B0A v_cndmask_b32_e64 v10, v10, 0, vcc ; D200000A 01A9010A v_mul_f32_e32 v4, v9, v4 ; 10080909 v_mad_f32 v4, v7, v11, v4 ; D2820004 04121707 v_mad_f32 v7, v12, v1, v18 ; D2820007 044A030C v_mad_f32 v0, v0, v1, v4 ; D2820000 04120300 v_add_f32_e64 v1, 0, v7 clamp ; D2060801 00020E80 v_mul_f32_e32 v4, v1, v8 ; 10081101 v_mul_f32_e32 v7, v1, v21 ; 100E2B01 v_mul_f32_e32 v8, v1, v17 ; 10102301 v_add_f32_e32 v1, v22, v1 ; 06020316 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_and_b32_e32 v9, v0, v23 ; 36122F00 v_log_f32_e32 v9, v9 ; 7E124F09 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, v4, 0, vcc ; D2000001 01A90104 v_cndmask_b32_e64 v4, v7, 0, vcc ; D2000004 01A90107 v_cndmask_b32_e64 v7, v8, 0, vcc ; D2000007 01A90108 v_add_f32_e32 v0, v22, v0 ; 06000116 v_mul_legacy_f32_e32 v8, s3, v9 ; 0E101203 v_mov_b32_e32 v9, 0x41000000 ; 7E1202FF 41000000 v_add_f32_e32 v9, s3, v9 ; 06121203 v_exp_f32_e32 v8, v8 ; 7E104B08 v_mul_f32_e32 v8, v8, v9 ; 10101308 v_mul_f32_e32 v8, 0x3d22f983, v8 ; 101010FF 3D22F983 v_mul_f32_e32 v2, v8, v2 ; 10040508 v_mul_f32_e32 v3, v8, v3 ; 10060708 v_mul_f32_e32 v8, v8, v16 ; 10102108 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v2, 0, vcc ; D2000000 01A90102 v_cndmask_b32_e64 v2, v3, 0, vcc ; D2000002 01A90103 v_cndmask_b32_e64 v3, v8, 0, vcc ; D2000003 01A90108 v_add_f32_e32 v0, v1, v0 ; 06000101 v_add_f32_e32 v1, v4, v2 ; 06020504 v_add_f32_e32 v2, v7, v3 ; 06040707 v_mul_f32_e32 v0, v0, v10 ; 10001500 v_mul_f32_e32 v1, v1, v10 ; 10021501 v_mul_f32_e32 v2, v2, v10 ; 10041502 v_mul_f32_e32 v0, v0, v13 ; 10001B00 v_mul_f32_e32 v1, v1, v14 ; 10021D01 v_mul_f32_e32 v2, v2, v15 ; 10041F02 v_mul_f32_e32 v0, s2, v0 ; 10000002 v_mul_f32_e32 v1, s1, v1 ; 10020201 v_mul_f32_e32 v2, s0, v2 ; 10040400 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mul_f32_e32 v1, v1, v5 ; 10020B01 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_xor_b32_e32 v3, 0x80000000, v6 ; 3A060CFF 80000000 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v0, 0, v0, vcc ; D2000000 01AA0080 v_cndmask_b32_e64 v1, 0, v1, vcc ; D2000001 01AA0280 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cndmask_b32_e64 v1, 0, v2, vcc ; D2000001 01AA0480 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 32 Code Size: 1284 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[4], PERSPECTIVE DCL IN[2], TEXCOORD[5], PERSPECTIVE DCL IN[3], TEXCOORD[6], PERSPECTIVE DCL IN[4], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..9] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { 0.3000, 0.5900, 0.1100, -0.0000} IMM[2] FLT32 { 20.0000, -0.8000, 0.2000, 8.0000} IMM[3] FLT32 { 0.0398, 0.0000, 340282346638528859811704183484516925440.0000, -340282346638528859811704183484516925440.0000} 0: DP3 TEMP[0].x, IN[3], IN[3] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[3].zzzz, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[3], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[1], IN[1] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[3].zzzz, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[1], TEMP[0].xxxx 8: TEX TEMP[3], IN[0], SAMP[1], 2D 9: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[0].x, TEMP[3], TEMP[3] 11: RSQ TEMP[0].x, TEMP[0].xxxx 12: MIN TEMP[0].x, IMM[3].zzzz, TEMP[0].xxxx 13: MUL TEMP[4].xyz, TEMP[3], TEMP[0].xxxx 14: DP3 TEMP[1].w, TEMP[4], TEMP[1] 15: MUL TEMP[3].xyz, TEMP[1].wwww, TEMP[4] 16: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, -TEMP[1] 17: DP3 TEMP[1].w, IN[2], IN[2] 18: RSQ TEMP[0], |TEMP[1].wwww| 19: MIN TEMP[2].w, IMM[3].zzzz, TEMP[0] 20: MUL TEMP[5].xyz, TEMP[2].wwww, IN[2] 21: DP3 TEMP[2].w, TEMP[5], -CONST[10] 22: ADD TEMP[2].w, TEMP[2].wwww, -CONST[11].xxxx 23: MUL_SAT TEMP[2].w, TEMP[2].wwww, CONST[11].yyyy 24: MUL TEMP[2].w, TEMP[2].wwww, TEMP[2].wwww 25: ADD TEMP[1].w, -TEMP[1].wwww, IMM[0].zzzz 26: MUL TEMP[5], TEMP[2].wwww, TEMP[1].wwww 27: MOV TEMP[6], TEMP[5] 28: KILL_IF TEMP[6] 29: UIF CONST[240].xxxx :0 30: RCP TEMP[3].w, IN[4].wwww 31: MUL TEMP[5].xy, TEMP[3].wwww, IN[4] 32: MAD TEMP[5].xy, TEMP[5], CONST[1], CONST[1].wzzw 33: TEX TEMP[6], TEMP[5], SAMP[0], 2D 34: ELSE :36 35: MOV TEMP[6].xyz, IMM[0].zzzz 36: ENDIF 37: MOV TEMP[5].z, IMM[0].zzzz 38: ADD TEMP[5].xyz, TEMP[5].zzzz, -CONST[0] 39: TEX TEMP[7], IN[0], SAMP[2], 2D 40: DP3 TEMP[3].w, TEMP[7], IMM[1] 41: LRP TEMP[8].xyz, CONST[7].xxxx, TEMP[3].wwww, TEMP[7] 42: TEX TEMP[9], IN[0], SAMP[3], 2D 43: MAD TEMP[9].xyz, TEMP[9], IMM[0].xxxx, IMM[0].yyyy 44: DP3 TEMP[1].x, TEMP[9], TEMP[1] 45: MAX TEMP[3].w, TEMP[1].xxxx, IMM[0].wwww 46: MOV_SAT TEMP[1].x, TEMP[1].xxxx 47: ADD TEMP[1].y, TEMP[1].xxxx, IMM[1].wwww 48: POW TEMP[4].w, |TEMP[1].xxxx|, CONST[7].yyyy 49: CMP TEMP[1].x, TEMP[1].yyyy, IMM[0].wwww, TEMP[4].wwww 50: MUL TEMP[9].xyz, TEMP[1].xxxx, CONST[6] 51: MAD TEMP[8].xyz, TEMP[8], TEMP[9], -TEMP[7] 52: MAD TEMP[1].xyz, TEMP[1].xxxx, TEMP[8], TEMP[7] 53: MUL TEMP[1].xyz, TEMP[1], CONST[7].zzzz 54: MUL TEMP[1].xyz, TEMP[5], TEMP[1] 55: MAD TEMP[1].xyz, TEMP[1], CONST[4].wwww, CONST[4] 56: MAX TEMP[4].w, TEMP[1].wwww, IMM[0].wwww 57: ADD TEMP[1].w, TEMP[4].wwww, IMM[1].wwww 58: POW TEMP[5].x, |TEMP[4].wwww|, CONST[9].wwww 59: CMP TEMP[1].w, TEMP[1].wwww, IMM[0].wwww, TEMP[5].xxxx 60: DP3_SAT TEMP[3].x, TEMP[3], TEMP[2] 61: ADD TEMP[3].y, TEMP[3].xxxx, IMM[1].wwww 62: LG2 TEMP[0].x, |TEMP[3].xxxx| 63: MAX TEMP[3].x, IMM[3].wwww, TEMP[0].xxxx 64: MUL TEMP[3].z, TEMP[3].xxxx, IMM[2].xxxx 65: EX2 TEMP[3].z, TEMP[3].zzzz 66: CMP TEMP[3].z, TEMP[3].yyyy, IMM[0].wwww, TEMP[3].zzzz 67: ADD TEMP[4].w, TEMP[3].wwww, IMM[2].yyyy 68: ADD TEMP[3].w, -TEMP[3].wwww, IMM[0].zzzz 69: CMP TEMP[3].w, TEMP[4].wwww, TEMP[3].wwww, IMM[2].zzzz 70: MUL TEMP[3].w, TEMP[3].wwww, TEMP[3].wwww 71: MUL TEMP[3].w, TEMP[3].wwww, TEMP[3].wwww 72: MUL TEMP[3].z, TEMP[3].wwww, TEMP[3].zzzz 73: TEX TEMP[7], IN[0], SAMP[4], 2D 74: MUL TEMP[5].xyz, TEMP[3].zzzz, TEMP[7] 75: MUL TEMP[7].xyz, TEMP[7], CONST[8].xxxx 76: MAD TEMP[5].xyz, CONST[7].wwww, TEMP[5], TEMP[7] 77: MUL TEMP[5].xyz, TEMP[5], CONST[7].zzzz 78: MAD TEMP[5].xyz, TEMP[5], CONST[5].wwww, CONST[5] 79: DP3_SAT TEMP[2].x, TEMP[4], TEMP[2] 80: ADD TEMP[2].y, TEMP[2].xxxx, IMM[1].wwww 81: CMP TEMP[2].x, TEMP[2].yyyy, IMM[0].wwww, TEMP[2].xxxx 82: MUL TEMP[2].y, TEMP[3].xxxx, CONST[8].yyyy 83: EX2 TEMP[2].y, TEMP[2].yyyy 84: MOV TEMP[4].y, CONST[8].yyyy 85: ADD TEMP[2].z, TEMP[4].yyyy, IMM[2].wwww 86: MUL TEMP[2].y, TEMP[2].zzzz, TEMP[2].yyyy 87: MUL TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx 88: CMP TEMP[2].y, TEMP[3].yyyy, IMM[0].wwww, TEMP[2].yyyy 89: MUL TEMP[3].xyz, TEMP[5], TEMP[2].yyyy 90: MAD TEMP[1].xyz, TEMP[1], TEMP[2].xxxx, TEMP[3] 91: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1] 92: MUL TEMP[1].xyz, TEMP[6], TEMP[1] 93: MUL TEMP[1].xyz, TEMP[1], CONST[9] 94: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[1] 95: CMP OUT[0].xyz, -TEMP[5].wwww, TEMP[1], IMM[0].wwww 96: MOV OUT[0].w, IMM[0].wwww 97: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %54 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 %58 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %59 = bitcast <8 x i32> addrspace(2)* %58 to <32 x i8> addrspace(2)* %60 = load <32 x i8>, <32 x i8> addrspace(2)* %59, align 32, !tbaa !0 %61 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %62 = bitcast <4 x i32> addrspace(2)* %61 to <16 x i8> addrspace(2)* %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 %64 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %65 = bitcast <8 x i32> addrspace(2)* %64 to <32 x i8> addrspace(2)* %66 = load <32 x i8>, <32 x i8> addrspace(2)* %65, align 32, !tbaa !0 %67 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %68 = bitcast <4 x i32> addrspace(2)* %67 to <16 x i8> addrspace(2)* %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 %70 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %71 = bitcast <8 x i32> addrspace(2)* %70 to <32 x i8> addrspace(2)* %72 = load <32 x i8>, <32 x i8> addrspace(2)* %71, align 32, !tbaa !0 %73 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %74 = bitcast <4 x i32> addrspace(2)* %73 to <16 x i8> addrspace(2)* %75 = load <16 x i8>, <16 x i8> addrspace(2)* %74, align 16, !tbaa !0 %76 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %77 = bitcast <8 x i32> addrspace(2)* %76 to <32 x i8> addrspace(2)* %78 = load <32 x i8>, <32 x i8> addrspace(2)* %77, align 32, !tbaa !0 %79 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %80 = bitcast <4 x i32> addrspace(2)* %79 to <16 x i8> addrspace(2)* %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %91 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %93 = fmul float %90, %90 %94 = fmul float %91, %91 %95 = fadd float %94, %93 %96 = fmul float %92, %92 %97 = fadd float %95, %96 %98 = call float @llvm.AMDGPU.rsq.clamped.f32(float %97) %99 = call float @llvm.minnum.f32(float %98, float 0x47EFFFFFE0000000) %100 = fmul float %90, %99 %101 = fmul float %91, %99 %102 = fmul float %92, %99 %103 = fmul float %84, %84 %104 = fmul float %85, %85 %105 = fadd float %104, %103 %106 = fmul float %86, %86 %107 = fadd float %105, %106 %108 = call float @llvm.AMDGPU.rsq.clamped.f32(float %107) %109 = call float @llvm.minnum.f32(float %108, float 0x47EFFFFFE0000000) %110 = fmul float %84, %109 %111 = fmul float %85, %109 %112 = fmul float %86, %109 %113 = bitcast float %82 to i32 %114 = bitcast float %83 to i32 %115 = insertelement <2 x i32> undef, i32 %113, i32 0 %116 = insertelement <2 x i32> %115, i32 %114, i32 1 %117 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %116, <32 x i8> %60, <16 x i8> %63, i32 2) %118 = extractelement <4 x float> %117, i32 0 %119 = extractelement <4 x float> %117, i32 1 %120 = extractelement <4 x float> %117, i32 2 %121 = fmul float %118, 2.000000e+00 %122 = fadd float %121, -1.000000e+00 %123 = fmul float %119, 2.000000e+00 %124 = fadd float %123, -1.000000e+00 %125 = fmul float %120, 2.000000e+00 %126 = fadd float %125, -1.000000e+00 %127 = fmul float %122, %122 %128 = fmul float %124, %124 %129 = fadd float %128, %127 %130 = fmul float %126, %126 %131 = fadd float %129, %130 %132 = call float @llvm.AMDGPU.rsq.clamped.f32(float %131) %133 = call float @llvm.minnum.f32(float %132, float 0x47EFFFFFE0000000) %134 = fmul float %122, %133 %135 = fmul float %124, %133 %136 = fmul float %126, %133 %137 = fmul float %134, %100 %138 = fmul float %135, %101 %139 = fadd float %138, %137 %140 = fmul float %136, %102 %141 = fadd float %139, %140 %142 = fmul float %141, %134 %143 = fmul float %141, %135 %144 = fmul float %141, %136 %145 = fmul float %142, 2.000000e+00 %146 = fsub float %145, %100 %147 = fmul float %143, 2.000000e+00 %148 = fsub float %147, %101 %149 = fmul float %144, 2.000000e+00 %150 = fsub float %149, %102 %151 = fmul float %87, %87 %152 = fmul float %88, %88 %153 = fadd float %152, %151 %154 = fmul float %89, %89 %155 = fadd float %153, %154 %156 = call float @fabs(float %155) %157 = call float @llvm.AMDGPU.rsq.clamped.f32(float %156) %158 = call float @llvm.minnum.f32(float %157, float 0x47EFFFFFE0000000) %159 = fmul float %158, %87 %160 = fmul float %158, %88 %161 = fmul float %158, %89 %162 = fmul float %48, %159 %163 = fsub float -0.000000e+00, %162 %164 = fmul float %49, %160 %165 = fsub float %163, %164 %166 = fmul float %50, %161 %167 = fsub float %165, %166 %168 = fsub float %167, %51 %169 = fmul float %168, %52 %170 = call float @llvm.AMDIL.clamp.(float %169, float 0.000000e+00, float 1.000000e+00) %171 = fmul float %170, %170 %172 = fsub float 1.000000e+00, %155 %173 = fmul float %171, %172 %174 = fmul float %171, %172 %175 = fmul float %171, %172 %176 = fmul float %171, %172 %177 = fcmp olt float %173, 0.000000e+00 %178 = fcmp olt float %174, 0.000000e+00 %179 = fcmp olt float %175, 0.000000e+00 %180 = fcmp olt float %176, 0.000000e+00 %181 = or i1 %180, %179 %182 = or i1 %181, %178 %183 = or i1 %182, %177 %184 = select i1 %183, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %184) %185 = bitcast float %53 to i32 %186 = icmp eq i32 %185, 0 br i1 %186, label %ENDIF, label %IF IF: ; preds = %main_body %187 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %188 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %189 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %190 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %191 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %192 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %193 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %194 = fdiv float 1.000000e+00, %187 %195 = fmul float %194, %189 %196 = fmul float %194, %188 %197 = fmul float %195, %193 %198 = fadd float %197, %190 %199 = fmul float %196, %192 %200 = fadd float %199, %191 %201 = bitcast float %198 to i32 %202 = bitcast float %200 to i32 %203 = insertelement <2 x i32> undef, i32 %201, i32 0 %204 = insertelement <2 x i32> %203, i32 %202, i32 1 %205 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %204, <32 x i8> %55, <16 x i8> %57, i32 2) %206 = extractelement <4 x float> %205, i32 0 %207 = extractelement <4 x float> %205, i32 1 %208 = extractelement <4 x float> %205, i32 2 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp24.0 = phi float [ %206, %IF ], [ 1.000000e+00, %main_body ] %temp25.0 = phi float [ %207, %IF ], [ 1.000000e+00, %main_body ] %temp26.0 = phi float [ %208, %IF ], [ 1.000000e+00, %main_body ] %209 = fsub float 1.000000e+00, %24 %210 = fsub float 1.000000e+00, %25 %211 = fsub float 1.000000e+00, %26 %212 = bitcast float %82 to i32 %213 = bitcast float %83 to i32 %214 = insertelement <2 x i32> undef, i32 %212, i32 0 %215 = insertelement <2 x i32> %214, i32 %213, i32 1 %216 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %215, <32 x i8> %66, <16 x i8> %69, i32 2) %217 = extractelement <4 x float> %216, i32 0 %218 = extractelement <4 x float> %216, i32 1 %219 = extractelement <4 x float> %216, i32 2 %220 = fmul float %217, 0x3FD3333340000000 %221 = fmul float %218, 0x3FE2E147A0000000 %222 = fadd float %221, %220 %223 = fmul float %219, 0x3FBC28F5C0000000 %224 = fadd float %222, %223 %225 = call float @llvm.AMDGPU.lrp(float %38, float %224, float %217) %226 = call float @llvm.AMDGPU.lrp(float %38, float %224, float %218) %227 = call float @llvm.AMDGPU.lrp(float %38, float %224, float %219) %228 = bitcast float %82 to i32 %229 = bitcast float %83 to i32 %230 = insertelement <2 x i32> undef, i32 %228, i32 0 %231 = insertelement <2 x i32> %230, i32 %229, i32 1 %232 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %231, <32 x i8> %72, <16 x i8> %75, i32 2) %233 = extractelement <4 x float> %232, i32 0 %234 = extractelement <4 x float> %232, i32 1 %235 = extractelement <4 x float> %232, i32 2 %236 = fmul float %233, 2.000000e+00 %237 = fadd float %236, -1.000000e+00 %238 = fmul float %234, 2.000000e+00 %239 = fadd float %238, -1.000000e+00 %240 = fmul float %235, 2.000000e+00 %241 = fadd float %240, -1.000000e+00 %242 = fmul float %237, %100 %243 = fmul float %239, %101 %244 = fadd float %243, %242 %245 = fmul float %241, %102 %246 = fadd float %244, %245 %247 = call float @llvm.maxnum.f32(float %246, float 0.000000e+00) %248 = call float @llvm.AMDIL.clamp.(float %246, float 0.000000e+00, float 1.000000e+00) %249 = fadd float %248, 0xBEB0C6F7A0000000 %250 = call float @fabs(float %248) %251 = call float @llvm.pow.f32(float %250, float %39) %252 = call float @llvm.AMDGPU.cndlt(float %249, float 0.000000e+00, float %251) %253 = fmul float %252, %35 %254 = fmul float %252, %36 %255 = fmul float %252, %37 %256 = fmul float %225, %253 %257 = fsub float %256, %217 %258 = fmul float %226, %254 %259 = fsub float %258, %218 %260 = fmul float %227, %255 %261 = fsub float %260, %219 %262 = fmul float %252, %257 %263 = fadd float %262, %217 %264 = fmul float %252, %259 %265 = fadd float %264, %218 %266 = fmul float %252, %261 %267 = fadd float %266, %219 %268 = fmul float %263, %40 %269 = fmul float %265, %40 %270 = fmul float %267, %40 %271 = fmul float %209, %268 %272 = fmul float %210, %269 %273 = fmul float %211, %270 %274 = fmul float %271, %30 %275 = fadd float %274, %27 %276 = fmul float %272, %30 %277 = fadd float %276, %28 %278 = fmul float %273, %30 %279 = fadd float %278, %29 %280 = call float @llvm.maxnum.f32(float %172, float 0.000000e+00) %281 = fadd float %280, 0xBEB0C6F7A0000000 %282 = call float @fabs(float %280) %283 = call float @llvm.pow.f32(float %282, float %47) %284 = call float @llvm.AMDGPU.cndlt(float %281, float 0.000000e+00, float %283) %285 = fmul float %146, %110 %286 = fmul float %148, %111 %287 = fadd float %286, %285 %288 = fmul float %150, %112 %289 = fadd float %287, %288 %290 = call float @llvm.AMDIL.clamp.(float %289, float 0.000000e+00, float 1.000000e+00) %291 = fadd float %290, 0xBEB0C6F7A0000000 %292 = call float @fabs(float %290) %293 = call float @llvm.log2.f32(float %292) %294 = call float @llvm.maxnum.f32(float %293, float 0xC7EFFFFFE0000000) %295 = fmul float %294, 2.000000e+01 %296 = call float @llvm.AMDIL.exp.(float %295) %297 = call float @llvm.AMDGPU.cndlt(float %291, float 0.000000e+00, float %296) %298 = fadd float %247, 0xBFE99999A0000000 %299 = fsub float 1.000000e+00, %247 %300 = call float @llvm.AMDGPU.cndlt(float %298, float %299, float 0x3FC9999980000000) %301 = fmul float %300, %300 %302 = fmul float %301, %301 %303 = fmul float %302, %297 %304 = bitcast float %82 to i32 %305 = bitcast float %83 to i32 %306 = insertelement <2 x i32> undef, i32 %304, i32 0 %307 = insertelement <2 x i32> %306, i32 %305, i32 1 %308 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %307, <32 x i8> %78, <16 x i8> %81, i32 2) %309 = extractelement <4 x float> %308, i32 0 %310 = extractelement <4 x float> %308, i32 1 %311 = extractelement <4 x float> %308, i32 2 %312 = fmul float %303, %309 %313 = fmul float %303, %310 %314 = fmul float %303, %311 %315 = fmul float %309, %42 %316 = fmul float %310, %42 %317 = fmul float %311, %42 %318 = fmul float %41, %312 %319 = fadd float %318, %315 %320 = fmul float %41, %313 %321 = fadd float %320, %316 %322 = fmul float %41, %314 %323 = fadd float %322, %317 %324 = fmul float %319, %40 %325 = fmul float %321, %40 %326 = fmul float %323, %40 %327 = fmul float %324, %34 %328 = fadd float %327, %31 %329 = fmul float %325, %34 %330 = fadd float %329, %32 %331 = fmul float %326, %34 %332 = fadd float %331, %33 %333 = fmul float %134, %110 %334 = fmul float %135, %111 %335 = fadd float %334, %333 %336 = fmul float %136, %112 %337 = fadd float %335, %336 %338 = call float @llvm.AMDIL.clamp.(float %337, float 0.000000e+00, float 1.000000e+00) %339 = fadd float %338, 0xBEB0C6F7A0000000 %340 = call float @llvm.AMDGPU.cndlt(float %339, float 0.000000e+00, float %338) %341 = fmul float %294, %43 %342 = call float @llvm.AMDIL.exp.(float %341) %343 = fadd float %43, 8.000000e+00 %344 = fmul float %343, %342 %345 = fmul float %344, 0x3FA45F3060000000 %346 = call float @llvm.AMDGPU.cndlt(float %291, float 0.000000e+00, float %345) %347 = fmul float %328, %346 %348 = fmul float %330, %346 %349 = fmul float %332, %346 %350 = fmul float %275, %340 %351 = fadd float %350, %347 %352 = fmul float %277, %340 %353 = fadd float %352, %348 %354 = fmul float %279, %340 %355 = fadd float %354, %349 %356 = fmul float %284, %351 %357 = fmul float %284, %353 %358 = fmul float %284, %355 %359 = fmul float %temp24.0, %356 %360 = fmul float %temp25.0, %357 %361 = fmul float %temp26.0, %358 %362 = fmul float %359, %44 %363 = fmul float %360, %45 %364 = fmul float %361, %46 %365 = fmul float %171, %362 %366 = fmul float %171, %363 %367 = fmul float %171, %364 %368 = fsub float -0.000000e+00, %176 %369 = call float @llvm.AMDGPU.cndlt(float %368, float %365, float 0.000000e+00) %370 = fsub float -0.000000e+00, %176 %371 = call float @llvm.AMDGPU.cndlt(float %370, float %366, float 0.000000e+00) %372 = fsub float -0.000000e+00, %176 %373 = call float @llvm.AMDGPU.cndlt(float %372, float %367, float 0.000000e+00) %374 = call i32 @llvm.SI.packf16(float %369, float %371) %375 = bitcast i32 %374 to float %376 = call i32 @llvm.SI.packf16(float %373, float 0.000000e+00) %377 = bitcast i32 %376 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %375, float %377, float %375, float %377) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v15, v0, 0, 1, [m0] ; C83C0400 v_interp_p2_f32 v15, [v15], v1, 0, 1, [m0] ; C83D0401 v_interp_p1_f32 v16, v0, 1, 1, [m0] ; C8400500 v_interp_p2_f32 v16, [v16], v1, 1, 1, [m0] ; C8410501 v_interp_p1_f32 v17, v0, 2, 1, [m0] ; C8440600 v_interp_p2_f32 v17, [v17], v1, 2, 1, [m0] ; C8450601 v_interp_p1_f32 v4, v0, 0, 2, [m0] ; C8100800 v_interp_p2_f32 v4, [v4], v1, 0, 2, [m0] ; C8110801 v_interp_p1_f32 v5, v0, 1, 2, [m0] ; C8140900 v_interp_p2_f32 v5, [v5], v1, 1, 2, [m0] ; C8150901 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[12:19], s[6:7], 0x8 ; C0C60708 v_interp_p1_f32 v6, v0, 2, 2, [m0] ; C8180A00 v_interp_p2_f32 v6, [v6], v1, 2, 2, [m0] ; C8190A01 v_interp_p1_f32 v7, v0, 0, 3, [m0] ; C81C0C00 s_load_dwordx4 s[24:27], s[2:3], 0x0 ; C08C0300 v_interp_p2_f32 v7, [v7], v1, 0, 3, [m0] ; C81D0C01 v_interp_p1_f32 v8, v0, 1, 3, [m0] ; C8200D00 v_interp_p2_f32 v8, [v8], v1, 1, 3, [m0] ; C8210D01 v_interp_p1_f32 v9, v0, 2, 3, [m0] ; C8240E00 v_interp_p2_f32 v9, [v9], v1, 2, 3, [m0] ; C8250E01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[10:12], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00430A02 v_mul_f32_e32 v13, v4, v4 ; 101A0904 v_mad_f32 v13, v5, v5, v13 ; D282000D 04360B05 v_mad_f32 v13, v6, v6, v13 ; D282000D 04360D06 s_buffer_load_dword s0, s[24:27], 0x28 ; C2001928 s_buffer_load_dword s1, s[24:27], 0x29 ; C2009929 s_buffer_load_dword s2, s[24:27], 0x2a ; C201192A s_buffer_load_dword s3, s[24:27], 0x2c ; C201992C s_buffer_load_dword s8, s[24:27], 0x2d ; C204192D v_rsq_clamp_f32_e64 v14, |v13| ; D358010E 0000010D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v18, 2.0, v10, -1.0 ; D2820012 03CE14F4 v_mad_f32 v19, 2.0, v11, -1.0 ; D2820013 03CE16F4 v_mad_f32 v20, 2.0, v12, -1.0 ; D2820014 03CE18F4 v_min_f32_e32 v10, 0x7f7fffff, v14 ; 1E141CFF 7F7FFFFF v_mul_f32_e32 v4, v4, v10 ; 10081504 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s0, v4 ; 10080800 v_mul_f32_e32 v5, v5, v10 ; 100A1505 v_mad_f32 v4, -s1, v5, -v4 ; D2820004 A4120A01 v_mul_f32_e32 v5, v6, v10 ; 100A1506 v_mad_f32 v4, -s2, v5, v4 ; D2820004 24120A02 v_subrev_f32_e32 v4, s3, v4 ; 0A080803 v_mul_f32_e32 v4, s8, v4 ; 10080808 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_sub_f32_e32 v6, 1.0, v13 ; 080C1AF2 v_mul_f32_e32 v5, v6, v4 ; 100A0906 v_cmp_gt_f32_e32 vcc, 0, v5 ; 7C080A80 v_cndmask_b32_e64 v10, 0, -1.0, vcc ; D200000A 01A9E680 v_cndmask_b32_e64 v10, v10, -1.0, vcc ; D200000A 01A9E70A v_cndmask_b32_e64 v10, v10, -1.0, vcc ; D200000A 01A9E70A v_cndmask_b32_e64 v11, v10, -1.0, vcc ; D200000B 01A9E70A s_movk_i32 s0, 0xf00 ; B0000F00 s_buffer_load_dword s28, s[24:27], s0 ; C20E1800 v_mul_f32_e32 v10, v7, v7 ; 10140F07 v_mad_f32 v10, v8, v8, v10 ; D282000A 042A1108 v_mad_f32 v10, v9, v9, v10 ; D282000A 042A1309 v_rsq_clamp_f32_e32 v21, v10 ; 7E2A590A v_mul_f32_e32 v10, v15, v15 ; 10141F0F v_mad_f32 v10, v16, v16, v10 ; D282000A 042A2110 v_mad_f32 v10, v17, v17, v10 ; D282000A 042A2311 v_rsq_clamp_f32_e32 v22, v10 ; 7E2C590A s_buffer_load_dword s8, s[24:27], 0x24 ; C2041924 s_buffer_load_dword s3, s[24:27], 0x25 ; C2019925 s_buffer_load_dword s2, s[24:27], 0x26 ; C2011926 s_buffer_load_dword s10, s[24:27], 0x27 ; C2051927 s_buffer_load_dword s19, s[24:27], 0x0 ; C2099900 s_buffer_load_dword s18, s[24:27], 0x1 ; C2091901 s_buffer_load_dword s16, s[24:27], 0x2 ; C2081902 s_buffer_load_dword s0, s[24:27], 0x10 ; C2001910 s_buffer_load_dword s1, s[24:27], 0x11 ; C2009911 s_buffer_load_dword s15, s[24:27], 0x12 ; C2079912 s_buffer_load_dword s29, s[24:27], 0x13 ; C20E9913 s_buffer_load_dword s11, s[24:27], 0x14 ; C2059914 s_buffer_load_dword s12, s[24:27], 0x15 ; C2061915 s_buffer_load_dword s13, s[24:27], 0x16 ; C2069916 s_buffer_load_dword s30, s[24:27], 0x17 ; C20F1917 s_buffer_load_dword s23, s[24:27], 0x18 ; C20B9918 s_buffer_load_dword s22, s[24:27], 0x19 ; C20B1919 s_buffer_load_dword s21, s[24:27], 0x1a ; C20A991A s_buffer_load_dword s37, s[24:27], 0x1c ; C212991C s_buffer_load_dword s36, s[24:27], 0x1d ; C212191D s_buffer_load_dword s14, s[24:27], 0x1e ; C207191E s_buffer_load_dword s17, s[24:27], 0x1f ; C208991F s_buffer_load_dword s20, s[24:27], 0x20 ; C20A1920 s_buffer_load_dword s9, s[24:27], 0x21 ; C2049921 v_mul_f32_e32 v10, v18, v18 ; 10142512 v_mad_f32 v10, v19, v19, v10 ; D282000A 042A2713 v_mad_f32 v10, v20, v20, v10 ; D282000A 042A2914 v_rsq_clamp_f32_e32 v23, v10 ; 7E2E590A v_mov_b32_e32 v10, 1.0 ; 7E1402F2 v_cmpx_le_f32_e32 vcc, 0, v11 ; 7C261680 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_mov_b32_e32 v11, 1.0 ; 7E1602F2 v_mov_b32_e32 v12, 1.0 ; 7E1802F2 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ne_i32_e64 s[32:33], 0, s28 ; D10A0020 00003880 v_mov_b32_e32 v14, s29 ; 7E1C021D v_mov_b32_e32 v13, s30 ; 7E1A021E s_and_saveexec_b64 s[28:29], s[32:33] ; BE9C2420 s_xor_b64 s[28:29], exec, s[28:29] ; 899C1C7E s_cbranch_execz BB0_2 ; BF880000 s_load_dwordx8 s[40:47], s[6:7], 0x0 ; C0D40700 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 v_interp_p1_f32 v10, v0, 3, 4, [m0] ; C8281300 s_buffer_load_dword s30, s[24:27], 0x6 ; C20F1906 s_buffer_load_dword s31, s[24:27], 0x7 ; C20F9907 s_buffer_load_dword s38, s[24:27], 0x4 ; C2131904 s_buffer_load_dword s39, s[24:27], 0x5 ; C2139905 v_interp_p2_f32 v10, [v10], v1, 3, 4, [m0] ; C8291301 v_interp_p1_f32 v11, v0, 1, 4, [m0] ; C82C1100 v_rcp_f32_e32 v10, v10 ; 7E14550A v_interp_p2_f32 v11, [v11], v1, 1, 4, [m0] ; C82D1101 v_interp_p1_f32 v0, v0, 0, 4, [m0] ; C8001000 v_interp_p2_f32 v0, [v0], v1, 0, 4, [m0] ; C8011001 v_mul_f32_e32 v0, v0, v10 ; 10001500 v_mul_f32_e32 v1, v11, v10 ; 1002150B s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v10, s31 ; 7E14021F v_mad_f32 v10, s38, v0, v10 ; D282000A 042A0026 v_mov_b32_e32 v0, s30 ; 7E00021E v_mad_f32 v11, s39, v1, v0 ; D282000B 04020227 image_sample v[10:12], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[40:47], s[32:35] ; F0800700 010A0A0A s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[28:29] ; 88FE1C7E v_min_f32_e32 v0, 0x7f7fffff, v21 ; 1E002AFF 7F7FFFFF v_min_f32_e32 v1, 0x7f7fffff, v22 ; 1E022CFF 7F7FFFFF v_mul_f32_e32 v15, v1, v15 ; 101E1F01 v_mul_f32_e32 v16, v1, v16 ; 10202101 v_mul_f32_e32 v1, v1, v17 ; 10022301 v_min_f32_e32 v17, 0x7f7fffff, v23 ; 1E222EFF 7F7FFFFF v_mul_f32_e32 v18, v17, v18 ; 10242511 v_mul_f32_e32 v19, v17, v19 ; 10262711 v_mul_f32_e32 v17, v17, v20 ; 10222911 v_mul_f32_e32 v20, v0, v7 ; 10280F00 v_sub_f32_e64 v21, 1.0, s37 ; D2080015 00004AF2 s_load_dwordx4 s[40:43], s[4:5], 0x8 ; C0940508 s_load_dwordx4 s[44:47], s[4:5], 0xc ; C096050C s_load_dwordx4 s[24:27], s[4:5], 0x10 ; C08C0510 s_load_dwordx8 s[48:55], s[6:7], 0x10 ; C0D80710 s_load_dwordx8 s[56:63], s[6:7], 0x18 ; C0DC0718 s_load_dwordx8 s[28:35], s[6:7], 0x20 ; C0CE0720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[48:55], s[40:43] ; F0800700 014C1602 image_sample v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[56:63], s[44:47] ; F0800700 016E1902 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v25, 2.0, v25, -1.0 ; D2820019 03CE32F4 v_mad_f32 v26, 2.0, v26, -1.0 ; D282001A 03CE34F4 v_mad_f32 v27, 2.0, v27, -1.0 ; D282001B 03CE36F4 v_mul_f32_e32 v28, 0x3e99999a, v22 ; 10382CFF 3E99999A v_madmk_f32_e32 v28, v23, v28, 0x3f170a3d ; 40383917 3F170A3D v_madmk_f32_e32 v28, v24, v28, 0x3de147ae ; 40383918 3DE147AE v_mul_f32_e32 v29, v22, v21 ; 103A2B16 v_mad_f32 v29, s37, v28, v29 ; D282001D 04763825 v_mul_f32_e32 v30, v23, v21 ; 103C2B17 v_mad_f32 v30, s37, v28, v30 ; D282001E 047A3825 v_mul_f32_e32 v21, v24, v21 ; 102A2B18 v_mad_f32 v21, s37, v28, v21 ; D2820015 04563825 v_mul_f32_e32 v25, v20, v25 ; 10323314 v_mul_f32_e32 v28, v0, v8 ; 10381100 v_mad_f32 v25, v26, v28, v25 ; D2820019 0466391A v_mul_f32_e32 v26, v0, v9 ; 10341300 v_mad_f32 v25, v27, v26, v25 ; D2820019 0466351B v_add_f32_e64 v27, 0, v25 clamp ; D206081B 00023280 v_mov_b32_e32 v31, 0x7fffffff ; 7E3E02FF 7FFFFFFF v_and_b32_e32 v32, v27, v31 ; 36403F1B v_log_f32_e32 v32, v32 ; 7E404F20 v_mov_b32_e32 v33, 0xb58637bd ; 7E4202FF B58637BD v_add_f32_e32 v27, v33, v27 ; 06363721 v_cmp_gt_f32_e32 vcc, 0, v27 ; 7C083680 v_mul_legacy_f32_e32 v27, s36, v32 ; 0E364024 v_exp_f32_e32 v27, v27 ; 7E364B1B v_cndmask_b32_e64 v27, v27, 0, vcc ; D200001B 01A9011B v_mul_f32_e32 v32, s23, v27 ; 10403617 v_mad_f32 v29, v29, v32, -v22 ; D282001D 845A411D v_mul_f32_e32 v32, s22, v27 ; 10403616 v_mad_f32 v30, v30, v32, -v23 ; D282001E 845E411E v_mul_f32_e32 v32, s21, v27 ; 10403615 v_mad_f32 v21, v21, v32, -v24 ; D2820015 84624115 v_mad_f32 v22, v27, v29, v22 ; D2820016 045A3B1B v_mad_f32 v23, v27, v30, v23 ; D2820017 045E3D1B v_mad_f32 v21, v27, v21, v24 ; D2820015 04622B1B v_mul_f32_e32 v20, v20, v18 ; 10282514 v_mad_f32 v20, v19, v28, v20 ; D2820014 04523913 v_mad_f32 v20, v17, v26, v20 ; D2820014 04523511 v_mul_f32_e32 v24, v18, v20 ; 10302912 v_mad_f32 v24, v20, v18, v24 ; D2820018 04622514 v_mad_f32 v7, -v7, v0, v24 ; D2820007 24620107 v_mul_f32_e32 v24, v19, v20 ; 10302913 v_mad_f32 v24, v20, v19, v24 ; D2820018 04622714 v_mad_f32 v8, -v8, v0, v24 ; D2820008 24620108 v_mul_f32_e32 v24, v17, v20 ; 10302911 v_mad_f32 v20, v20, v17, v24 ; D2820014 04622314 v_mad_f32 v0, -v9, v0, v20 ; D2820000 24520109 v_mul_f32_e32 v7, v15, v7 ; 100E0F0F v_mad_f32 v7, v8, v16, v7 ; D2820007 041E2108 v_mad_f32 v0, v0, v1, v7 ; D2820000 041E0300 v_max_f32_e32 v7, 0, v25 ; 200E3280 v_mov_b32_e32 v8, 0xbf4ccccd ; 7E1002FF BF4CCCCD v_add_f32_e32 v8, v7, v8 ; 06101107 v_cmp_gt_f32_e32 vcc, 0, v8 ; 7C081080 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v7, 1.0, v7 ; 080E0EF2 v_log_f32_e64 v8, |v0| ; D34E0108 00000100 v_mov_b32_e32 v9, 0x3e4ccccc ; 7E1202FF 3E4CCCCC v_cndmask_b32_e64 v7, v9, v7, vcc ; D2000007 01AA0F09 v_mov_b32_e32 v9, 0xff7fffff ; 7E1202FF FF7FFFFF v_max_f32_e32 v8, v8, v9 ; 20101308 v_add_f32_e32 v0, v33, v0 ; 06000121 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_mul_f32_e32 v0, 0x41a00000, v8 ; 100010FF 41A00000 v_exp_f32_e32 v0, v0 ; 7E004B00 v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 v_mul_f32_e32 v7, v7, v7 ; 100E0F07 v_mul_f32_e32 v7, v7, v7 ; 100E0F07 v_mul_f32_e32 v0, v0, v7 ; 10000F00 image_sample v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[24:27] ; F0800700 00C71802 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v24, v0 ; 10040118 v_mul_f32_e32 v3, v25, v0 ; 10060119 v_mul_f32_e32 v0, v26, v0 ; 1000011A v_mul_f32_e32 v7, s20, v24 ; 100E3014 v_mul_f32_e32 v9, s20, v25 ; 10123214 v_mul_f32_e32 v20, s20, v26 ; 10283414 v_mul_f32_e32 v22, s14, v22 ; 102C2C0E v_sub_f32_e64 v24, 1.0, s19 ; D2080018 000026F2 v_mul_f32_e32 v22, v22, v24 ; 102C3116 v_mul_f32_e32 v23, s14, v23 ; 102E2E0E v_sub_f32_e64 v24, 1.0, s18 ; D2080018 000024F2 v_mul_f32_e32 v23, v23, v24 ; 102E3117 v_mul_f32_e32 v21, s14, v21 ; 102A2A0E v_sub_f32_e64 v24, 1.0, s16 ; D2080018 000020F2 v_mul_f32_e32 v21, v21, v24 ; 102A3115 v_mad_f32 v22, v14, v22, s0 ; D2820016 00022D0E v_mad_f32 v23, v23, v14, s1 ; D2820017 00061D17 v_mad_f32 v14, v21, v14, s15 ; D282000E 003E1D15 v_max_f32_e32 v6, 0, v6 ; 200C0C80 v_and_b32_e32 v21, v6, v31 ; 362A3F06 v_add_f32_e32 v6, v33, v6 ; 060C0D21 v_log_f32_e32 v21, v21 ; 7E2A4F15 v_cmp_gt_f32_e64 s[0:1], 0, v6 ; D0080000 00020C80 v_mad_f32 v2, s17, v2, v7 ; D2820002 041E0411 v_mad_f32 v3, s17, v3, v9 ; D2820003 04260611 v_mul_legacy_f32_e32 v6, s10, v21 ; 0E0C2A0A v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_cndmask_b32_e64 v6, v6, 0, s[0:1] ; D2000006 00010106 v_mad_f32 v0, s17, v0, v20 ; D2820000 04520011 v_mul_f32_e32 v2, s14, v2 ; 1004040E v_mul_f32_e32 v3, s14, v3 ; 1006060E v_mul_f32_e32 v0, s14, v0 ; 1000000E v_mad_f32 v2, v13, v2, s11 ; D2820002 002E050D v_mad_f32 v3, v3, v13, s12 ; D2820003 00321B03 v_mad_f32 v0, v0, v13, s13 ; D2820000 00361B00 v_mul_f32_e32 v7, v15, v18 ; 100E250F v_mad_f32 v7, v19, v16, v7 ; D2820007 041E2113 v_mad_f32 v1, v17, v1, v7 ; D2820001 041E0311 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_add_f32_e32 v7, v33, v1 ; 060E0321 v_cmp_gt_f32_e64 s[0:1], 0, v7 ; D0080000 00020E80 v_cndmask_b32_e64 v1, v1, 0, s[0:1] ; D2000001 00010101 v_mul_f32_e32 v7, s9, v8 ; 100E1009 v_mov_b32_e32 v8, 0x41000000 ; 7E1002FF 41000000 v_add_f32_e32 v8, s9, v8 ; 06101009 v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_mul_f32_e32 v7, v7, v8 ; 100E1107 v_mul_f32_e32 v7, 0x3d22f983, v7 ; 100E0EFF 3D22F983 v_cndmask_b32_e64 v7, v7, 0, vcc ; D2000007 01A90107 v_mul_f32_e32 v2, v7, v2 ; 10040507 v_mul_f32_e32 v3, v7, v3 ; 10060707 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_mad_f32 v2, v22, v1, v2 ; D2820002 040A0316 v_mad_f32 v3, v23, v1, v3 ; D2820003 040E0317 v_mad_f32 v0, v14, v1, v0 ; D2820000 0402030E v_mul_f32_e32 v1, v2, v6 ; 10020D02 v_mul_f32_e32 v2, v3, v6 ; 10040D03 v_mul_f32_e32 v0, v0, v6 ; 10000D00 v_mul_f32_e32 v1, v1, v10 ; 10021501 v_mul_f32_e32 v2, v2, v11 ; 10041702 v_mul_f32_e32 v0, v0, v12 ; 10001900 v_mul_f32_e32 v1, s8, v1 ; 10020208 v_mul_f32_e32 v2, s3, v2 ; 10040403 v_mul_f32_e32 v0, s2, v0 ; 10000002 v_mul_f32_e32 v1, v1, v4 ; 10020901 v_mul_f32_e32 v2, v2, v4 ; 10040902 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_xor_b32_e32 v3, 0x80000000, v5 ; 3A060AFF 80000000 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v1, 0, v1, vcc ; D2000001 01AA0280 v_cndmask_b32_e64 v2, 0, v2, vcc ; D2000002 01AA0480 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cndmask_b32_e64 v0, 0, v0, vcc ; D2000000 01AA0080 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 36 Code Size: 1632 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[4], PERSPECTIVE DCL IN[2], TEXCOORD[5], PERSPECTIVE DCL IN[3], TEXCOORD[6], PERSPECTIVE DCL IN[4], TEXCOORD[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL CONST[0..240] DCL TEMP[0], LOCAL DCL TEMP[1..8] IMM[0] FLT32 { -0.0000, 8.0000, 0.0398, 0.0000} IMM[1] FLT32 { 2.0000, -1.0000, -2.0000, -0.0000} IMM[2] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[3], IN[3] 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[3], TEMP[0].xxxx 4: DP3 TEMP[0].x, IN[1], IN[1] 5: RSQ TEMP[0].x, TEMP[0].xxxx 6: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 7: MUL TEMP[2].xyz, IN[1], TEMP[0].xxxx 8: TEX TEMP[3], IN[0], SAMP[1], 2D 9: MAD TEMP[4].xyz, TEMP[3], IMM[1].xxxx, IMM[1].yyyy 10: TEX TEMP[5], IN[0], SAMP[2], 2D 11: MAD TEMP[3].xyz, TEMP[3], IMM[1].xxxx, IMM[1].yyzw 12: MAD TEMP[3].xyz, TEMP[5].xxxx, TEMP[3], -IMM[1].wwyw 13: DP3 TEMP[0].x, TEMP[3], TEMP[3] 14: RSQ TEMP[0].x, TEMP[0].xxxx 15: MIN TEMP[0].x, IMM[2].xxxx, TEMP[0].xxxx 16: MUL TEMP[6].xyz, TEMP[3], TEMP[0].xxxx 17: DP3 TEMP[1].w, TEMP[6], TEMP[1] 18: MUL TEMP[3].xyz, TEMP[1].wwww, TEMP[6] 19: MAD TEMP[3].xyz, TEMP[3], IMM[1].xxxx, -TEMP[1] 20: DP3 TEMP[1].w, IN[2], IN[2] 21: RSQ TEMP[0], |TEMP[1].wwww| 22: MIN TEMP[2].w, IMM[2].xxxx, TEMP[0] 23: MUL TEMP[7].xyz, TEMP[2].wwww, IN[2] 24: DP3 TEMP[2].w, TEMP[7], -CONST[9] 25: ADD TEMP[2].w, TEMP[2].wwww, -CONST[10].xxxx 26: MUL_SAT TEMP[2].w, TEMP[2].wwww, CONST[10].yyyy 27: MUL TEMP[2].w, TEMP[2].wwww, TEMP[2].wwww 28: ADD TEMP[1].w, -TEMP[1].wwww, -IMM[1].yyyy 29: MUL TEMP[7], TEMP[2].wwww, TEMP[1].wwww 30: MOV TEMP[8], TEMP[7] 31: KILL_IF TEMP[8] 32: UIF CONST[240].xxxx :0 33: RCP TEMP[3].w, IN[4].wwww 34: MUL TEMP[5].zw, TEMP[3].wwww, IN[4].xyxy 35: MAD TEMP[5].zw, TEMP[5], CONST[1].xyxy, CONST[1].xywz 36: TEX TEMP[8], TEMP[5].zwzw, SAMP[0], 2D 37: ELSE :39 38: MOV TEMP[8].xyz, -IMM[1].yyyy 39: ENDIF 40: MOV TEMP[7].y, IMM[1].yyyy 41: ADD TEMP[7].xyz, -TEMP[7].yyyy, -CONST[0] 42: DP3_SAT TEMP[1].x, TEMP[1], TEMP[4] 43: ADD TEMP[1].y, TEMP[1].xxxx, IMM[0].xxxx 44: MUL TEMP[1].z, TEMP[1].xxxx, TEMP[1].xxxx 45: MUL TEMP[1].x, TEMP[1].zzzz, TEMP[1].xxxx 46: TEX TEMP[4], TEMP[3], SAMP[4], CUBE 47: MUL TEMP[4].xyz, TEMP[4], TEMP[5].xxxx 48: MUL TEMP[4].xyz, TEMP[4], CONST[6].xxxx 49: MUL TEMP[4].xyz, TEMP[1].xxxx, TEMP[4] 50: MUL TEMP[4].xyz, TEMP[4], TEMP[5].yyyy 51: CMP TEMP[1].xyz, TEMP[1].yyyy, -IMM[1].wwww, TEMP[4] 52: TEX TEMP[4], IN[0], SAMP[3], 2D 53: MAD TEMP[1].xyz, CONST[6].yyyy, TEMP[4], TEMP[1] 54: MUL TEMP[1].xyz, TEMP[1], CONST[6].zzzz 55: MUL TEMP[1].xyz, TEMP[7], TEMP[1] 56: MAD TEMP[1].xyz, TEMP[1], CONST[4].wwww, CONST[4] 57: MAX TEMP[3].w, TEMP[1].wwww, -IMM[1].wwww 58: ADD TEMP[1].w, TEMP[3].wwww, IMM[0].xxxx 59: POW TEMP[4].x, |TEMP[3].wwww|, CONST[8].wwww 60: CMP TEMP[1].w, TEMP[1].wwww, -IMM[1].wwww, TEMP[4].xxxx 61: MUL TEMP[3].w, TEMP[5].yyyy, CONST[6].wwww 62: MAD TEMP[4].xyz, TEMP[3].wwww, CONST[5].wwww, CONST[5] 63: DP3_SAT TEMP[3].w, TEMP[6], TEMP[2] 64: ADD TEMP[4].w, TEMP[3].wwww, IMM[0].xxxx 65: DP3_SAT TEMP[2].x, TEMP[3], TEMP[2] 66: ADD TEMP[2].y, TEMP[2].xxxx, IMM[0].xxxx 67: POW TEMP[3].x, |TEMP[2].xxxx|, CONST[7].xxxx 68: MOV TEMP[3].y, IMM[0].yyyy 69: ADD TEMP[2].x, TEMP[3].yyyy, CONST[7].xxxx 70: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx 71: MUL TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz 72: MUL TEMP[1].xyz, TEMP[1], TEMP[3].wwww 73: CMP TEMP[1].xyz, TEMP[4].wwww, -IMM[1].wwww, TEMP[1] 74: MUL TEMP[3].xyz, TEMP[4], TEMP[2].xxxx 75: CMP TEMP[2].xyz, TEMP[2].yyyy, -IMM[1].wwww, TEMP[3] 76: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 77: MUL TEMP[1].xyz, TEMP[1].wwww, TEMP[1] 78: MUL TEMP[1].xyz, TEMP[8], TEMP[1] 79: MUL TEMP[1].xyz, TEMP[1], CONST[8] 80: MUL TEMP[1].xyz, TEMP[2].wwww, TEMP[1] 81: CMP OUT[0].xyz, -TEMP[7].wwww, TEMP[1], -IMM[1].wwww 82: MOV OUT[0].w, -IMM[1].wwww 83: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 3840) %50 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %51 = load <32 x i8>, <32 x i8> addrspace(2)* %50, align 32, !tbaa !0 %52 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %55 = bitcast <8 x i32> addrspace(2)* %54 to <32 x i8> addrspace(2)* %56 = load <32 x i8>, <32 x i8> addrspace(2)* %55, align 32, !tbaa !0 %57 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %58 = bitcast <4 x i32> addrspace(2)* %57 to <16 x i8> addrspace(2)* %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 %60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %61 = bitcast <8 x i32> addrspace(2)* %60 to <32 x i8> addrspace(2)* %62 = load <32 x i8>, <32 x i8> addrspace(2)* %61, align 32, !tbaa !0 %63 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %64 = bitcast <4 x i32> addrspace(2)* %63 to <16 x i8> addrspace(2)* %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0 %66 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %67 = bitcast <8 x i32> addrspace(2)* %66 to <32 x i8> addrspace(2)* %68 = load <32 x i8>, <32 x i8> addrspace(2)* %67, align 32, !tbaa !0 %69 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %70 = bitcast <4 x i32> addrspace(2)* %69 to <16 x i8> addrspace(2)* %71 = load <16 x i8>, <16 x i8> addrspace(2)* %70, align 16, !tbaa !0 %72 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %73 = bitcast <8 x i32> addrspace(2)* %72 to <32 x i8> addrspace(2)* %74 = load <32 x i8>, <32 x i8> addrspace(2)* %73, align 32, !tbaa !0 %75 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %76 = bitcast <4 x i32> addrspace(2)* %75 to <16 x i8> addrspace(2)* %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !tbaa !0 %78 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %89 = fmul float %86, %86 %90 = fmul float %87, %87 %91 = fadd float %90, %89 %92 = fmul float %88, %88 %93 = fadd float %91, %92 %94 = call float @llvm.AMDGPU.rsq.clamped.f32(float %93) %95 = call float @llvm.minnum.f32(float %94, float 0x47EFFFFFE0000000) %96 = fmul float %86, %95 %97 = fmul float %87, %95 %98 = fmul float %88, %95 %99 = fmul float %80, %80 %100 = fmul float %81, %81 %101 = fadd float %100, %99 %102 = fmul float %82, %82 %103 = fadd float %101, %102 %104 = call float @llvm.AMDGPU.rsq.clamped.f32(float %103) %105 = call float @llvm.minnum.f32(float %104, float 0x47EFFFFFE0000000) %106 = fmul float %80, %105 %107 = fmul float %81, %105 %108 = fmul float %82, %105 %109 = bitcast float %78 to i32 %110 = bitcast float %79 to i32 %111 = insertelement <2 x i32> undef, i32 %109, i32 0 %112 = insertelement <2 x i32> %111, i32 %110, i32 1 %113 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %112, <32 x i8> %56, <16 x i8> %59, i32 2) %114 = extractelement <4 x float> %113, i32 0 %115 = extractelement <4 x float> %113, i32 1 %116 = extractelement <4 x float> %113, i32 2 %117 = extractelement <4 x float> %113, i32 3 %118 = fmul float %114, 2.000000e+00 %119 = fadd float %118, -1.000000e+00 %120 = fmul float %115, 2.000000e+00 %121 = fadd float %120, -1.000000e+00 %122 = fmul float %116, 2.000000e+00 %123 = fadd float %122, -1.000000e+00 %124 = bitcast float %78 to i32 %125 = bitcast float %79 to i32 %126 = insertelement <2 x i32> undef, i32 %124, i32 0 %127 = insertelement <2 x i32> %126, i32 %125, i32 1 %128 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %127, <32 x i8> %62, <16 x i8> %65, i32 2) %129 = extractelement <4 x float> %128, i32 0 %130 = extractelement <4 x float> %128, i32 1 %131 = fmul float %114, 2.000000e+00 %132 = fadd float %131, -1.000000e+00 %133 = fmul float %115, 2.000000e+00 %134 = fadd float %133, -1.000000e+00 %135 = fmul float %116, 2.000000e+00 %136 = fadd float %135, -2.000000e+00 %137 = fmul float %129, %132 %138 = fadd float %137, 0.000000e+00 %139 = fmul float %129, %134 %140 = fadd float %139, 0.000000e+00 %141 = fmul float %129, %136 %142 = fadd float %141, 1.000000e+00 %143 = fmul float %138, %138 %144 = fmul float %140, %140 %145 = fadd float %144, %143 %146 = fmul float %142, %142 %147 = fadd float %145, %146 %148 = call float @llvm.AMDGPU.rsq.clamped.f32(float %147) %149 = call float @llvm.minnum.f32(float %148, float 0x47EFFFFFE0000000) %150 = fmul float %138, %149 %151 = fmul float %140, %149 %152 = fmul float %142, %149 %153 = fmul float %150, %96 %154 = fmul float %151, %97 %155 = fadd float %154, %153 %156 = fmul float %152, %98 %157 = fadd float %155, %156 %158 = fmul float %157, %150 %159 = fmul float %157, %151 %160 = fmul float %157, %152 %161 = fmul float %158, 2.000000e+00 %162 = fsub float %161, %96 %163 = fmul float %159, 2.000000e+00 %164 = fsub float %163, %97 %165 = fmul float %160, 2.000000e+00 %166 = fsub float %165, %98 %167 = fmul float %83, %83 %168 = fmul float %84, %84 %169 = fadd float %168, %167 %170 = fmul float %85, %85 %171 = fadd float %169, %170 %172 = call float @fabs(float %171) %173 = call float @llvm.AMDGPU.rsq.clamped.f32(float %172) %174 = call float @llvm.minnum.f32(float %173, float 0x47EFFFFFE0000000) %175 = fmul float %174, %83 %176 = fmul float %174, %84 %177 = fmul float %174, %85 %178 = fmul float %44, %175 %179 = fsub float -0.000000e+00, %178 %180 = fmul float %45, %176 %181 = fsub float %179, %180 %182 = fmul float %46, %177 %183 = fsub float %181, %182 %184 = fsub float %183, %47 %185 = fmul float %184, %48 %186 = call float @llvm.AMDIL.clamp.(float %185, float 0.000000e+00, float 1.000000e+00) %187 = fmul float %186, %186 %188 = fsub float 1.000000e+00, %171 %189 = fmul float %187, %188 %190 = fmul float %187, %188 %191 = fmul float %187, %188 %192 = fmul float %187, %188 %193 = fcmp olt float %189, 0.000000e+00 %194 = fcmp olt float %190, 0.000000e+00 %195 = fcmp olt float %191, 0.000000e+00 %196 = fcmp olt float %192, 0.000000e+00 %197 = or i1 %196, %195 %198 = or i1 %197, %194 %199 = or i1 %198, %193 %200 = select i1 %199, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %200) %201 = bitcast float %49 to i32 %202 = icmp eq i32 %201, 0 br i1 %202, label %ENDIF, label %IF IF: ; preds = %main_body %203 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %204 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %205 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %206 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %207 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %208 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %209 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %210 = fdiv float 1.000000e+00, %203 %211 = fmul float %210, %205 %212 = fmul float %210, %204 %213 = fmul float %211, %209 %214 = fadd float %213, %206 %215 = fmul float %212, %208 %216 = fadd float %215, %207 %217 = bitcast float %214 to i32 %218 = bitcast float %216 to i32 %219 = insertelement <2 x i32> undef, i32 %217, i32 0 %220 = insertelement <2 x i32> %219, i32 %218, i32 1 %221 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %220, <32 x i8> %51, <16 x i8> %53, i32 2) %222 = extractelement <4 x float> %221, i32 0 %223 = extractelement <4 x float> %221, i32 1 %224 = extractelement <4 x float> %221, i32 2 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp15.0 = phi float [ %210, %IF ], [ %117, %main_body ] %temp32.0 = phi float [ %222, %IF ], [ 1.000000e+00, %main_body ] %temp33.0 = phi float [ %223, %IF ], [ 1.000000e+00, %main_body ] %temp34.0 = phi float [ %224, %IF ], [ 1.000000e+00, %main_body ] %225 = fsub float 1.000000e+00, %24 %226 = fsub float 1.000000e+00, %25 %227 = fsub float 1.000000e+00, %26 %228 = fmul float %96, %119 %229 = fmul float %97, %121 %230 = fadd float %229, %228 %231 = fmul float %98, %123 %232 = fadd float %230, %231 %233 = call float @llvm.AMDIL.clamp.(float %232, float 0.000000e+00, float 1.000000e+00) %234 = fadd float %233, 0xBEB0C6F7A0000000 %235 = fmul float %233, %233 %236 = fmul float %235, %233 %237 = insertelement <4 x float> undef, float %162, i32 0 %238 = insertelement <4 x float> %237, float %164, i32 1 %239 = insertelement <4 x float> %238, float %166, i32 2 %240 = insertelement <4 x float> %239, float %temp15.0, i32 3 %241 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %240) %242 = extractelement <4 x float> %241, i32 0 %243 = extractelement <4 x float> %241, i32 1 %244 = extractelement <4 x float> %241, i32 2 %245 = extractelement <4 x float> %241, i32 3 %246 = call float @fabs(float %244) %247 = fdiv float 1.000000e+00, %246 %248 = fmul float %242, %247 %249 = fadd float %248, 1.500000e+00 %250 = fmul float %243, %247 %251 = fadd float %250, 1.500000e+00 %252 = bitcast float %251 to i32 %253 = bitcast float %249 to i32 %254 = bitcast float %245 to i32 %255 = insertelement <4 x i32> undef, i32 %252, i32 0 %256 = insertelement <4 x i32> %255, i32 %253, i32 1 %257 = insertelement <4 x i32> %256, i32 %254, i32 2 %258 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %257, <32 x i8> %74, <16 x i8> %77, i32 4) %259 = extractelement <4 x float> %258, i32 0 %260 = extractelement <4 x float> %258, i32 1 %261 = extractelement <4 x float> %258, i32 2 %262 = fmul float %259, %129 %263 = fmul float %260, %129 %264 = fmul float %261, %129 %265 = fmul float %262, %35 %266 = fmul float %263, %35 %267 = fmul float %264, %35 %268 = fmul float %236, %265 %269 = fmul float %236, %266 %270 = fmul float %236, %267 %271 = fmul float %268, %130 %272 = fmul float %269, %130 %273 = fmul float %270, %130 %274 = call float @llvm.AMDGPU.cndlt(float %234, float 0.000000e+00, float %271) %275 = call float @llvm.AMDGPU.cndlt(float %234, float 0.000000e+00, float %272) %276 = call float @llvm.AMDGPU.cndlt(float %234, float 0.000000e+00, float %273) %277 = bitcast float %78 to i32 %278 = bitcast float %79 to i32 %279 = insertelement <2 x i32> undef, i32 %277, i32 0 %280 = insertelement <2 x i32> %279, i32 %278, i32 1 %281 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %280, <32 x i8> %68, <16 x i8> %71, i32 2) %282 = extractelement <4 x float> %281, i32 0 %283 = extractelement <4 x float> %281, i32 1 %284 = extractelement <4 x float> %281, i32 2 %285 = fmul float %36, %282 %286 = fadd float %285, %274 %287 = fmul float %36, %283 %288 = fadd float %287, %275 %289 = fmul float %36, %284 %290 = fadd float %289, %276 %291 = fmul float %286, %37 %292 = fmul float %288, %37 %293 = fmul float %290, %37 %294 = fmul float %225, %291 %295 = fmul float %226, %292 %296 = fmul float %227, %293 %297 = fmul float %294, %30 %298 = fadd float %297, %27 %299 = fmul float %295, %30 %300 = fadd float %299, %28 %301 = fmul float %296, %30 %302 = fadd float %301, %29 %303 = call float @llvm.maxnum.f32(float %188, float 0.000000e+00) %304 = fadd float %303, 0xBEB0C6F7A0000000 %305 = call float @fabs(float %303) %306 = call float @llvm.pow.f32(float %305, float %43) %307 = call float @llvm.AMDGPU.cndlt(float %304, float 0.000000e+00, float %306) %308 = fmul float %130, %38 %309 = fmul float %308, %34 %310 = fadd float %309, %31 %311 = fmul float %308, %34 %312 = fadd float %311, %32 %313 = fmul float %308, %34 %314 = fadd float %313, %33 %315 = fmul float %150, %106 %316 = fmul float %151, %107 %317 = fadd float %316, %315 %318 = fmul float %152, %108 %319 = fadd float %317, %318 %320 = call float @llvm.AMDIL.clamp.(float %319, float 0.000000e+00, float 1.000000e+00) %321 = fadd float %320, 0xBEB0C6F7A0000000 %322 = fmul float %162, %106 %323 = fmul float %164, %107 %324 = fadd float %323, %322 %325 = fmul float %166, %108 %326 = fadd float %324, %325 %327 = call float @llvm.AMDIL.clamp.(float %326, float 0.000000e+00, float 1.000000e+00) %328 = fadd float %327, 0xBEB0C6F7A0000000 %329 = call float @fabs(float %327) %330 = call float @llvm.pow.f32(float %329, float %39) %331 = fadd float %39, 8.000000e+00 %332 = fmul float %331, %330 %333 = fmul float %332, 0x3FA45F3060000000 %334 = fmul float %298, %320 %335 = fmul float %300, %320 %336 = fmul float %302, %320 %337 = call float @llvm.AMDGPU.cndlt(float %321, float 0.000000e+00, float %334) %338 = call float @llvm.AMDGPU.cndlt(float %321, float 0.000000e+00, float %335) %339 = call float @llvm.AMDGPU.cndlt(float %321, float 0.000000e+00, float %336) %340 = fmul float %310, %333 %341 = fmul float %312, %333 %342 = fmul float %314, %333 %343 = call float @llvm.AMDGPU.cndlt(float %328, float 0.000000e+00, float %340) %344 = call float @llvm.AMDGPU.cndlt(float %328, float 0.000000e+00, float %341) %345 = call float @llvm.AMDGPU.cndlt(float %328, float 0.000000e+00, float %342) %346 = fadd float %337, %343 %347 = fadd float %338, %344 %348 = fadd float %339, %345 %349 = fmul float %307, %346 %350 = fmul float %307, %347 %351 = fmul float %307, %348 %352 = fmul float %temp32.0, %349 %353 = fmul float %temp33.0, %350 %354 = fmul float %temp34.0, %351 %355 = fmul float %352, %40 %356 = fmul float %353, %41 %357 = fmul float %354, %42 %358 = fmul float %187, %355 %359 = fmul float %187, %356 %360 = fmul float %187, %357 %361 = fsub float -0.000000e+00, %192 %362 = call float @llvm.AMDGPU.cndlt(float %361, float %358, float 0.000000e+00) %363 = fsub float -0.000000e+00, %192 %364 = call float @llvm.AMDGPU.cndlt(float %363, float %359, float 0.000000e+00) %365 = fsub float -0.000000e+00, %192 %366 = call float @llvm.AMDGPU.cndlt(float %365, float %360, float 0.000000e+00) %367 = call i32 @llvm.SI.packf16(float %362, float %364) %368 = bitcast i32 %367 to float %369 = call i32 @llvm.SI.packf16(float %366, float 0.000000e+00) %370 = bitcast i32 %369 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %368, float %370, float %368, float %370) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_load_dwordx4 s[0:3], s[4:5], 0x4 ; C0800504 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v18, v0, 0, 1, [m0] ; C8480400 v_interp_p2_f32 v18, [v18], v1, 0, 1, [m0] ; C8490401 v_interp_p1_f32 v19, v0, 1, 1, [m0] ; C84C0500 v_interp_p2_f32 v19, [v19], v1, 1, 1, [m0] ; C84D0501 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[20:23], 0x24 ; C2041524 s_buffer_load_dword s9, s[20:23], 0x25 ; C2049525 s_buffer_load_dword s10, s[20:23], 0x26 ; C2051526 s_buffer_load_dword s11, s[20:23], 0x28 ; C2059528 v_interp_p1_f32 v21, v0, 2, 1, [m0] ; C8540600 v_interp_p2_f32 v21, [v21], v1, 2, 1, [m0] ; C8550601 v_interp_p1_f32 v4, v0, 0, 2, [m0] ; C8100800 v_interp_p2_f32 v4, [v4], v1, 0, 2, [m0] ; C8110801 v_interp_p1_f32 v5, v0, 1, 2, [m0] ; C8140900 v_interp_p2_f32 v5, [v5], v1, 1, 2, [m0] ; C8150901 v_interp_p1_f32 v6, v0, 2, 2, [m0] ; C8180A00 v_interp_p2_f32 v6, [v6], v1, 2, 2, [m0] ; C8190A01 v_interp_p1_f32 v20, v0, 0, 3, [m0] ; C8500C00 v_interp_p2_f32 v20, [v20], v1, 0, 3, [m0] ; C8510C01 v_interp_p1_f32 v22, v0, 1, 3, [m0] ; C8580D00 v_interp_p2_f32 v22, [v22], v1, 1, 3, [m0] ; C8590D01 s_load_dwordx8 s[12:19], s[6:7], 0x8 ; C0C60708 v_interp_p1_f32 v23, v0, 2, 3, [m0] ; C85C0E00 v_interp_p2_f32 v23, [v23], v1, 2, 3, [m0] ; C85D0E01 v_mul_f32_e32 v7, v4, v4 ; 100E0904 v_mad_f32 v7, v5, v5, v7 ; D2820007 041E0B05 v_mad_f32 v11, v6, v6, v7 ; D282000B 041E0D06 s_buffer_load_dword s24, s[20:23], 0x29 ; C20C1529 v_rsq_clamp_f32_e64 v12, |v11| ; D358010C 0000010B s_load_dwordx4 s[28:31], s[4:5], 0x8 ; C08E0508 s_load_dwordx8 s[32:39], s[6:7], 0x10 ; C0D00710 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030702 v_min_f32_e32 v12, 0x7f7fffff, v12 ; 1E1818FF 7F7FFFFF v_mul_f32_e32 v4, v4, v12 ; 10081904 v_mul_f32_e32 v4, s8, v4 ; 10080808 v_mul_f32_e32 v5, v5, v12 ; 100A1905 v_mad_f32 v4, -s9, v5, -v4 ; D2820004 A4120A09 v_mul_f32_e32 v5, v6, v12 ; 100A1906 v_mad_f32 v4, -s10, v5, v4 ; D2820004 24120A0A v_subrev_f32_e32 v4, s11, v4 ; 0A08080B v_mul_f32_e32 v4, s24, v4 ; 10080818 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_sub_f32_e32 v6, 1.0, v11 ; 080C16F2 v_mul_f32_e32 v5, v6, v4 ; 100A0906 v_cmp_gt_f32_e32 vcc, 0, v5 ; 7C080A80 v_cndmask_b32_e64 v11, 0, -1.0, vcc ; D200000B 01A9E680 v_cndmask_b32_e64 v11, v11, -1.0, vcc ; D200000B 01A9E70B v_cndmask_b32_e64 v11, v11, -1.0, vcc ; D200000B 01A9E70B v_cndmask_b32_e64 v12, v11, -1.0, vcc ; D200000C 01A9E70B s_movk_i32 s0, 0xf00 ; B0000F00 s_buffer_load_dword s24, s[20:23], s0 ; C20C1400 v_mul_f32_e32 v11, v20, v20 ; 10162914 v_mad_f32 v11, v22, v22, v11 ; D282000B 042E2D16 v_mad_f32 v11, v23, v23, v11 ; D282000B 042E2F17 v_rsq_clamp_f32_e32 v25, v11 ; 7E32590B v_mul_f32_e32 v11, v18, v18 ; 10162512 v_mad_f32 v11, v19, v19, v11 ; D282000B 042E2713 v_mad_f32 v11, v21, v21, v11 ; D282000B 042E2B15 v_rsq_clamp_f32_e32 v27, v11 ; 7E36590B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v11, 2.0, v7, -1.0 ; D282000B 03CE0EF4 image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[28:31] ; F0800F00 00E80E02 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v26, v14, v11, 0 ; D282001A 0202170E v_mad_f32 v11, 2.0, v8, -1.0 ; D282000B 03CE10F4 v_mad_f32 v28, v14, v11, 0 ; D282001C 0202170E v_mad_f32 v11, 2.0, v9, -2.0 ; D282000B 03D612F4 v_mad_f32 v29, v14, v11, 1.0 ; D282001D 03CA170E s_buffer_load_dword s44, s[20:23], 0x0 ; C2161500 s_buffer_load_dword s45, s[20:23], 0x1 ; C2169501 s_buffer_load_dword s14, s[20:23], 0x2 ; C2071502 s_buffer_load_dword s11, s[20:23], 0x10 ; C2059510 s_buffer_load_dword s12, s[20:23], 0x11 ; C2061511 s_buffer_load_dword s13, s[20:23], 0x12 ; C2069512 s_buffer_load_dword s25, s[20:23], 0x13 ; C20C9513 s_buffer_load_dword s8, s[20:23], 0x14 ; C2041514 s_buffer_load_dword s9, s[20:23], 0x15 ; C2049515 s_buffer_load_dword s10, s[20:23], 0x16 ; C2051516 s_buffer_load_dword s26, s[20:23], 0x17 ; C20D1517 s_buffer_load_dword s19, s[20:23], 0x18 ; C2099518 s_buffer_load_dword s17, s[20:23], 0x19 ; C2089519 s_buffer_load_dword s16, s[20:23], 0x1a ; C208151A s_buffer_load_dword s18, s[20:23], 0x1b ; C209151B s_buffer_load_dword s3, s[20:23], 0x1c ; C201951C s_buffer_load_dword s0, s[20:23], 0x20 ; C2001520 s_buffer_load_dword s1, s[20:23], 0x21 ; C2009521 s_buffer_load_dword s2, s[20:23], 0x22 ; C2011522 s_buffer_load_dword s15, s[20:23], 0x23 ; C2079523 v_mul_f32_e32 v11, v26, v26 ; 1016351A v_mad_f32 v11, v28, v28, v11 ; D282000B 042E391C v_mad_f32 v11, v29, v29, v11 ; D282000B 042E3B1D v_rsq_clamp_f32_e32 v30, v11 ; 7E3C590B v_mov_b32_e32 v11, 1.0 ; 7E1602F2 v_cmpx_le_f32_e32 vcc, 0, v12 ; 7C261880 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_mov_b32_e32 v12, 1.0 ; 7E1802F2 v_mov_b32_e32 v13, 1.0 ; 7E1A02F2 v_cmp_ne_i32_e64 s[28:29], 0, s24 ; D10A001C 00003080 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v17, s25 ; 7E220219 v_mov_b32_e32 v16, s26 ; 7E20021A v_mov_b32_e32 v24, v10 ; 7E30030A s_and_saveexec_b64 s[24:25], s[28:29] ; BE98241C s_xor_b64 s[24:25], exec, s[24:25] ; 8998187E s_cbranch_execz BB0_2 ; BF880000 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_load_dwordx4 s[36:39], s[4:5], 0x0 ; C0920500 v_interp_p1_f32 v10, v0, 3, 4, [m0] ; C8281300 s_buffer_load_dword s26, s[20:23], 0x7 ; C20D1507 s_buffer_load_dword s27, s[20:23], 0x4 ; C20D9504 s_buffer_load_dword s40, s[20:23], 0x6 ; C2141506 s_buffer_load_dword s41, s[20:23], 0x5 ; C2149505 v_interp_p2_f32 v10, [v10], v1, 3, 4, [m0] ; C8291301 v_interp_p1_f32 v11, v0, 1, 4, [m0] ; C82C1100 v_rcp_f32_e32 v24, v10 ; 7E30550A v_interp_p2_f32 v11, [v11], v1, 1, 4, [m0] ; C82D1101 v_interp_p1_f32 v0, v0, 0, 4, [m0] ; C8001000 v_interp_p2_f32 v0, [v0], v1, 0, 4, [m0] ; C8011001 v_mul_f32_e32 v0, v0, v24 ; 10003100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s26 ; 7E02021A v_mad_f32 v0, s27, v0, v1 ; D2820000 0406001B v_mul_f32_e32 v1, v11, v24 ; 1002310B v_mov_b32_e32 v10, s40 ; 7E140228 v_mad_f32 v1, s41, v1, v10 ; D2820001 042A0229 image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[28:35], s[36:39] ; F0800700 01270B00 s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[24:25] ; 88FE187E v_min_f32_e32 v0, 0x7f7fffff, v25 ; 1E0032FF 7F7FFFFF v_min_f32_e32 v1, 0x7f7fffff, v27 ; 1E0236FF 7F7FFFFF v_mul_f32_e32 v10, v1, v18 ; 10142501 v_mul_f32_e32 v18, v1, v19 ; 10242701 v_mul_f32_e32 v1, v1, v21 ; 10022B01 v_min_f32_e32 v19, 0x7f7fffff, v30 ; 1E263CFF 7F7FFFFF v_mul_f32_e32 v24, v19, v26 ; 10303513 v_mul_f32_e32 v25, v19, v28 ; 10323913 v_mul_f32_e32 v19, v19, v29 ; 10263B13 s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C s_load_dwordx4 s[32:35], s[4:5], 0x10 ; C0900510 s_load_dwordx8 s[24:31], s[6:7], 0x18 ; C0CC0718 s_load_dwordx8 s[36:43], s[6:7], 0x20 ; C0D20720 v_sub_f32_e64 v26, 1.0, s44 ; D208001A 000058F2 v_sub_f32_e64 v27, 1.0, s45 ; D208001B 00005AF2 v_mul_f32_e32 v28, v0, v20 ; 10382900 v_mul_f32_e32 v29, v0, v22 ; 103A2D00 v_mul_f32_e32 v21, v28, v24 ; 102A311C v_mad_f32 v21, v25, v29, v21 ; D2820015 04563B19 v_mul_f32_e32 v30, v0, v23 ; 103C2F00 v_mad_f32 v31, v19, v30, v21 ; D282001F 04563D13 v_mul_f32_e32 v21, v24, v31 ; 102A3F18 v_mad_f32 v21, v31, v24, v21 ; D2820015 0456311F v_mad_f32 v21, -v20, v0, v21 ; D2820015 24560114 v_mul_f32_e32 v20, v25, v31 ; 10283F19 v_mad_f32 v20, v31, v25, v20 ; D2820014 0452331F v_mad_f32 v22, -v22, v0, v20 ; D2820016 24520116 v_mul_f32_e32 v20, v19, v31 ; 10283F13 v_mad_f32 v20, v31, v19, v20 ; D2820014 0452271F v_mad_f32 v23, -v23, v0, v20 ; D2820017 24520117 v_cubeid_f32 v34, v21, v22, v23 ; D2880022 045E2D15 v_cubema_f32 v33, v21, v22, v23 ; D28E0021 045E2D15 v_cubesc_f32 v32, v21, v22, v23 ; D28A0020 045E2D15 v_cubetc_f32 v31, v21, v22, v23 ; D28C001F 045E2D15 v_rcp_f32_e64 v0, |v33| ; D3540100 00000121 v_mov_b32_e32 v20, 0x3fc00000 ; 7E2802FF 3FC00000 v_mad_f32 v33, v31, v0, v20 ; D2820021 0452011F v_mad_f32 v32, v32, v0, v20 ; D2820020 04520120 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[31:33], 7, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[36:43], s[32:35] ; F0800700 01091F20 v_mul_f32_e32 v0, v10, v21 ; 10002B0A v_mad_f32 v0, v22, v18, v0 ; D2820000 04022516 v_mad_f32 v0, v23, v1, v0 ; D2820000 04020317 v_mad_f32 v7, 2.0, v7, -1.0 ; D2820007 03CE0EF4 v_mad_f32 v8, 2.0, v8, -1.0 ; D2820008 03CE10F4 v_mad_f32 v9, 2.0, v9, -1.0 ; D2820009 03CE12F4 v_mul_f32_e32 v7, v7, v28 ; 100E3907 v_mad_f32 v7, v29, v8, v7 ; D2820007 041E111D v_mad_f32 v7, v30, v9, v7 ; D2820007 041E131E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v8, v14, v31 ; 10103F0E v_mul_f32_e32 v9, v14, v32 ; 1012410E v_mul_f32_e32 v14, v14, v33 ; 101C430E v_mul_f32_e32 v8, s19, v8 ; 10101013 v_mul_f32_e32 v9, s19, v9 ; 10121213 v_mul_f32_e32 v14, s19, v14 ; 101C1C13 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 v_mul_f32_e32 v20, v7, v7 ; 10280F07 v_mul_f32_e32 v20, v7, v20 ; 10282907 v_mul_f32_e32 v8, v8, v20 ; 10102908 v_mul_f32_e32 v9, v9, v20 ; 10122909 v_mul_f32_e32 v14, v14, v20 ; 101C290E v_mov_b32_e32 v20, 0xb58637bd ; 7E2802FF B58637BD v_add_f32_e32 v7, v20, v7 ; 060E0F14 v_cmp_gt_f32_e32 vcc, 0, v7 ; 7C080E80 v_mul_f32_e32 v7, v15, v8 ; 100E110F v_mul_f32_e32 v8, v15, v9 ; 1010130F v_mul_f32_e32 v9, v15, v14 ; 10121D0F v_cndmask_b32_e64 v7, v7, 0, vcc ; D2000007 01A90107 v_cndmask_b32_e64 v8, v8, 0, vcc ; D2000008 01A90108 v_cndmask_b32_e64 v9, v9, 0, vcc ; D2000009 01A90109 image_sample v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[20:23] ; F0800700 00A61502 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, s17, v21, v7 ; D2820002 041E2A11 v_mad_f32 v3, s17, v22, v8 ; D2820003 04222C11 v_mad_f32 v7, s17, v23, v9 ; D2820007 04262E11 v_mul_f32_e32 v8, s18, v15 ; 10101E12 v_mul_f32_e32 v2, s16, v2 ; 10040410 v_mul_f32_e32 v3, s16, v3 ; 10060610 v_mul_f32_e32 v7, s16, v7 ; 100E0E10 v_mul_f32_e32 v2, v2, v26 ; 10043502 v_mul_f32_e32 v3, v3, v27 ; 10063703 v_sub_f32_e64 v9, 1.0, s14 ; D2080009 00001CF2 v_mul_f32_e32 v7, v7, v9 ; 100E1307 v_max_f32_e32 v6, 0, v6 ; 200C0C80 v_mov_b32_e32 v9, 0x7fffffff ; 7E1202FF 7FFFFFFF v_and_b32_e32 v14, v6, v9 ; 361C1306 v_log_f32_e32 v14, v14 ; 7E1C4F0E v_mad_f32 v2, v17, v2, s11 ; D2820002 002E0511 v_mad_f32 v3, v3, v17, s12 ; D2820003 00322303 v_mad_f32 v7, v7, v17, s13 ; D2820007 00362307 v_mul_legacy_f32_e32 v14, s15, v14 ; 0E1C1C0F v_add_f32_e32 v6, v20, v6 ; 060C0D14 v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_cmp_gt_f32_e32 vcc, 0, v6 ; 7C080C80 v_cndmask_b32_e64 v6, v14, 0, vcc ; D2000006 01A9010E v_mad_f32 v14, v16, v8, s8 ; D282000E 00221110 v_mad_f32 v15, v8, v16, s9 ; D282000F 00262108 v_mad_f32 v8, v8, v16, s10 ; D2820008 002A2108 v_mul_f32_e32 v10, v10, v24 ; 1014310A v_mad_f32 v10, v25, v18, v10 ; D282000A 042A2519 v_mad_f32 v1, v19, v1, v10 ; D2820001 042A0313 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v2, v1, v2 ; 10040501 v_mul_f32_e32 v3, v1, v3 ; 10060701 v_mul_f32_e32 v7, v1, v7 ; 100E0F01 v_add_f32_e32 v1, v20, v1 ; 06020314 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_and_b32_e32 v9, v0, v9 ; 36121300 v_log_f32_e32 v9, v9 ; 7E124F09 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, v2, 0, vcc ; D2000001 01A90102 v_cndmask_b32_e64 v2, v3, 0, vcc ; D2000002 01A90103 v_cndmask_b32_e64 v3, v7, 0, vcc ; D2000003 01A90107 v_add_f32_e32 v0, v20, v0 ; 06000114 v_mul_legacy_f32_e32 v7, s3, v9 ; 0E0E1203 v_mov_b32_e32 v9, 0x41000000 ; 7E1202FF 41000000 v_add_f32_e32 v9, s3, v9 ; 06121203 v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_mul_f32_e32 v7, v7, v9 ; 100E1307 v_mul_f32_e32 v7, 0x3d22f983, v7 ; 100E0EFF 3D22F983 v_mul_f32_e32 v9, v7, v14 ; 10121D07 v_mul_f32_e32 v10, v7, v15 ; 10141F07 v_mul_f32_e32 v7, v7, v8 ; 100E1107 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v9, 0, vcc ; D2000000 01A90109 v_cndmask_b32_e64 v8, v10, 0, vcc ; D2000008 01A9010A v_cndmask_b32_e64 v7, v7, 0, vcc ; D2000007 01A90107 v_add_f32_e32 v0, v0, v1 ; 06000300 v_add_f32_e32 v1, v8, v2 ; 06020508 v_add_f32_e32 v2, v7, v3 ; 06040707 v_mul_f32_e32 v0, v0, v6 ; 10000D00 v_mul_f32_e32 v1, v1, v6 ; 10020D01 v_mul_f32_e32 v2, v2, v6 ; 10040D02 v_mul_f32_e32 v0, v0, v11 ; 10001700 v_mul_f32_e32 v1, v1, v12 ; 10021901 v_mul_f32_e32 v2, v2, v13 ; 10041B02 v_mul_f32_e32 v0, s0, v0 ; 10000000 v_mul_f32_e32 v1, s1, v1 ; 10020201 v_mul_f32_e32 v2, s2, v2 ; 10040402 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_mul_f32_e32 v1, v1, v4 ; 10020901 v_mul_f32_e32 v2, v2, v4 ; 10040902 v_xor_b32_e32 v3, 0x80000000, v5 ; 3A060AFF 80000000 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v0, 0, v0, vcc ; D2000000 01AA0080 v_cndmask_b32_e64 v1, 0, v1, vcc ; D2000001 01AA0280 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cndmask_b32_e64 v1, 0, v2, vcc ; D2000001 01AA0480 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 36 Code Size: 1548 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1].xyz, COLOR DCL OUT[2], COLOR[1] DCL OUT[3].xy, TEXCOORD[0] DCL OUT[4], TEXCOORD[4] DCL OUT[5], TEXCOORD[5] DCL OUT[6], TEXCOORD[6] DCL OUT[7].xyz, TEXCOORD[7] DCL CONST[0..255] DCL TEMP[0..8] DCL ADDR[0] IMM[0] FLT32 { 3.0000, 0.0078, -1.0000, 1.0000} IMM[1] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0], IN[2], IMM[0].yyyy, IMM[0].zzzz 1: MUL OUT[2].w, TEMP[0].wwww, CONST[235].wwww 2: MUL TEMP[1], IMM[0].xxxx, IN[3] 3: MOV TEMP[2], TEMP[1].yxzw 4: ARR ADDR[0], TEMP[2] 5: MUL TEMP[3], IN[4].yyyy, CONST[ADDR[0].x+7] 6: ARR ADDR[0], TEMP[2] 7: MAD TEMP[3], IN[4].xxxx, CONST[ADDR[0].y+7], TEMP[3] 8: ARR ADDR[0], TEMP[2] 9: MAD TEMP[1], IN[4].zzzz, CONST[ADDR[0].z+7], TEMP[3] 10: ARR ADDR[0], TEMP[2] 11: MAD TEMP[1], IN[4].wwww, CONST[ADDR[0].w+7], TEMP[1] 12: MOV TEMP[3].xyz, CONST[239] 13: MAD TEMP[3].xyz, IN[0], TEMP[3], CONST[238] 14: MOV TEMP[3].w, IMM[0].wwww 15: DP4 TEMP[1].w, TEMP[3], TEMP[1] 16: MUL TEMP[4], TEMP[1].wwww, CONST[232] 17: ARR ADDR[0], TEMP[2] 18: MUL TEMP[5], IN[4].yyyy, CONST[ADDR[0].x+6] 19: ARR ADDR[0], TEMP[2] 20: MUL TEMP[6], IN[4].yyyy, CONST[ADDR[0].x+8] 21: ARR ADDR[0], TEMP[2] 22: MAD TEMP[6], IN[4].xxxx, CONST[ADDR[0].y+8], TEMP[6] 23: ARR ADDR[0], TEMP[2] 24: MAD TEMP[5], IN[4].xxxx, CONST[ADDR[0].y+6], TEMP[5] 25: ARR ADDR[0], TEMP[2] 26: MAD TEMP[5], IN[4].zzzz, CONST[ADDR[0].z+6], TEMP[5] 27: ARR ADDR[0], TEMP[2] 28: MAD TEMP[6], IN[4].zzzz, CONST[ADDR[0].z+8], TEMP[6] 29: ARR ADDR[0], TEMP[2] 30: MAD TEMP[6], IN[4].wwww, CONST[ADDR[0].w+8], TEMP[6] 31: ARR ADDR[0], TEMP[2] 32: MAD TEMP[5], IN[4].wwww, CONST[ADDR[0].w+6], TEMP[5] 33: DP4 TEMP[1].w, TEMP[3], TEMP[5] 34: DP4 TEMP[3].x, TEMP[3], TEMP[6] 35: MAD TEMP[4], CONST[231], TEMP[1].wwww, TEMP[4] 36: MAD TEMP[3], CONST[233], TEMP[3].xxxx, TEMP[4] 37: ADD TEMP[3], TEMP[3], CONST[234] 38: MAD TEMP[4].xyz, TEMP[3], -CONST[4].wwww, CONST[4] 39: MUL TEMP[7].xyz, TEMP[4].yyyy, CONST[236] 40: MAD TEMP[4].xyw, CONST[235].xyzz, TEMP[4].xxxx, TEMP[7].xyzz 41: MAD TEMP[4].xyz, CONST[237], TEMP[4].zzzz, TEMP[4].xyww 42: MAD TEMP[7].xyz, IN[1], IMM[0].yyyy, IMM[0].zzzz 43: DP3 TEMP[8].x, TEMP[7], TEMP[5] 44: DP3 TEMP[5].x, TEMP[0], TEMP[5] 45: DP3 TEMP[8].y, TEMP[7], TEMP[1] 46: DP3 TEMP[5].y, TEMP[0], TEMP[1] 47: DP3 TEMP[8].z, TEMP[7], TEMP[6] 48: DP3 TEMP[5].z, TEMP[0], TEMP[6] 49: DP3 OUT[6].x, TEMP[8], TEMP[4] 50: MUL TEMP[0].xyz, TEMP[8].yzxw, TEMP[5].zxyw 51: MAD TEMP[0].xyz, TEMP[5].yzxw, TEMP[8].zxyw, -TEMP[0] 52: MUL TEMP[0].xyz, TEMP[0].wwww, TEMP[0] 53: DP3 OUT[6].y, TEMP[0], TEMP[4] 54: DP3 OUT[6].z, TEMP[5], TEMP[4] 55: MOV TEMP[1].xyz, CONST[240] 56: MUL TEMP[4].xyz, TEMP[1].yyyy, CONST[236] 57: MAD TEMP[1].xyw, CONST[235].xyzz, TEMP[1].xxxx, TEMP[4].xyzz 58: MAD TEMP[1].xyz, CONST[237], TEMP[1].zzzz, TEMP[1].xyww 59: DP3 OUT[4].x, TEMP[8], TEMP[1] 60: DP3 OUT[4].y, TEMP[0], TEMP[1] 61: DP3 OUT[4].z, TEMP[5], TEMP[1] 62: DP3 OUT[7].x, TEMP[8], CONST[237] 63: MUL TEMP[1].xy, TEMP[8].yyyy, CONST[232].xzzw 64: MAD TEMP[1].xy, CONST[231].xzzw, TEMP[8].xxxx, TEMP[1] 65: MAD TEMP[1].xy, CONST[233].xzzw, TEMP[8].zzzz, TEMP[1] 66: DP3 OUT[7].y, TEMP[0], CONST[237] 67: DP3 OUT[7].z, TEMP[5], CONST[237] 68: MUL TEMP[4].xy, TEMP[5].yyyy, CONST[232].xzzw 69: MAD TEMP[4].xy, CONST[231].xzzw, TEMP[5].xxxx, TEMP[4] 70: MAD TEMP[4].xy, CONST[233].xzzw, TEMP[5].zzzz, TEMP[4] 71: MUL TEMP[0].yw, TEMP[0].yyyy, CONST[232].xxzz 72: MAD TEMP[0].xy, CONST[231].xzzw, TEMP[0].xxxx, TEMP[0].ywzw 73: MAD TEMP[0].xy, CONST[233].xzzw, TEMP[0].zzzz, TEMP[0] 74: MOV TEMP[1].z, TEMP[0].xxxx 75: MOV OUT[2].y, TEMP[0].yyyy 76: MOV TEMP[1].w, TEMP[4].xxxx 77: MOV OUT[2].z, TEMP[4].yyyy 78: MOV OUT[1].xyz, TEMP[1].xzww 79: MOV OUT[2].x, TEMP[1].yyyy 80: MOV OUT[3].xy, IN[5] 81: MOV OUT[4].w, IMM[1].xxxx 82: MUL TEMP[0], TEMP[3].yyyy, CONST[1] 83: MAD TEMP[0], CONST[0], TEMP[3].xxxx, TEMP[0] 84: MAD TEMP[0], CONST[2], TEMP[3].zzzz, TEMP[0] 85: MAD TEMP[0], CONST[3], TEMP[3].wwww, TEMP[0] 86: MOV OUT[5], TEMP[0] 87: MOV OUT[0], TEMP[0] 88: MOV OUT[6].w, IMM[0].wwww 89: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3696) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3700) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3704) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3708) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3712) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3716) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3720) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3724) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3728) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3732) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3736) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3740) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3744) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3748) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3752) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3756) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3760) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3764) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3768) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3772) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3776) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3780) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3784) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3792) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3796) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3800) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3808) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3812) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3816) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3824) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3828) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3832) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3840) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3844) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3848) %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 %70 = add i32 %5, %7 %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %70) %72 = extractelement <4 x float> %71, i32 0 %73 = extractelement <4 x float> %71, i32 1 %74 = extractelement <4 x float> %71, i32 2 %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 %77 = add i32 %5, %7 %78 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %77) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 1 %81 = extractelement <4 x float> %78, i32 2 %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 %84 = add i32 %5, %7 %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = extractelement <4 x float> %85, i32 3 %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 %92 = add i32 %5, %7 %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 1 %96 = extractelement <4 x float> %93, i32 2 %97 = extractelement <4 x float> %93, i32 3 %98 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %99 = load <16 x i8>, <16 x i8> addrspace(2)* %98, align 16, !tbaa !0 %100 = add i32 %5, %7 %101 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %99, i32 0, i32 %100) %102 = extractelement <4 x float> %101, i32 0 %103 = extractelement <4 x float> %101, i32 1 %104 = extractelement <4 x float> %101, i32 2 %105 = extractelement <4 x float> %101, i32 3 %106 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 5 %107 = load <16 x i8>, <16 x i8> addrspace(2)* %106, align 16, !tbaa !0 %108 = add i32 %5, %7 %109 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %107, i32 0, i32 %108) %110 = extractelement <4 x float> %109, i32 0 %111 = extractelement <4 x float> %109, i32 1 %112 = fmul float %86, 0x3F80101020000000 %113 = fadd float %112, -1.000000e+00 %114 = fmul float %87, 0x3F80101020000000 %115 = fadd float %114, -1.000000e+00 %116 = fmul float %88, 0x3F80101020000000 %117 = fadd float %116, -1.000000e+00 %118 = fmul float %89, 0x3F80101020000000 %119 = fadd float %118, -1.000000e+00 %120 = fmul float %119, %52 %121 = fmul float %94, 3.000000e+00 %122 = fmul float %95, 3.000000e+00 %123 = fmul float %96, 3.000000e+00 %124 = fmul float %97, 3.000000e+00 %125 = call float @llvm.AMDIL.round.nearest.(float %122) %126 = fptosi float %125 to i32 %127 = call float @llvm.AMDIL.round.nearest.(float %121) %128 = call float @llvm.AMDIL.round.nearest.(float %123) %129 = call float @llvm.AMDIL.round.nearest.(float %124) %130 = shl i32 %126, 4 %131 = add i32 %130, 112 %132 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %131) %133 = fmul float %103, %132 %134 = shl i32 %126, 4 %135 = add i32 %134, 116 %136 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %135) %137 = fmul float %103, %136 %138 = shl i32 %126, 4 %139 = add i32 %138, 120 %140 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %139) %141 = fmul float %103, %140 %142 = shl i32 %126, 4 %143 = add i32 %142, 124 %144 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %143) %145 = fmul float %103, %144 %146 = call float @llvm.AMDIL.round.nearest.(float %122) %147 = call float @llvm.AMDIL.round.nearest.(float %121) %148 = fptosi float %147 to i32 %149 = call float @llvm.AMDIL.round.nearest.(float %123) %150 = call float @llvm.AMDIL.round.nearest.(float %124) %151 = shl i32 %148, 4 %152 = add i32 %151, 112 %153 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %152) %154 = fmul float %102, %153 %155 = fadd float %154, %133 %156 = shl i32 %148, 4 %157 = add i32 %156, 116 %158 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %157) %159 = fmul float %102, %158 %160 = fadd float %159, %137 %161 = shl i32 %148, 4 %162 = add i32 %161, 120 %163 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %162) %164 = fmul float %102, %163 %165 = fadd float %164, %141 %166 = shl i32 %148, 4 %167 = add i32 %166, 124 %168 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %167) %169 = fmul float %102, %168 %170 = fadd float %169, %145 %171 = call float @llvm.AMDIL.round.nearest.(float %122) %172 = call float @llvm.AMDIL.round.nearest.(float %121) %173 = call float @llvm.AMDIL.round.nearest.(float %123) %174 = fptosi float %173 to i32 %175 = call float @llvm.AMDIL.round.nearest.(float %124) %176 = shl i32 %174, 4 %177 = add i32 %176, 112 %178 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %177) %179 = fmul float %104, %178 %180 = fadd float %179, %155 %181 = shl i32 %174, 4 %182 = add i32 %181, 116 %183 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %182) %184 = fmul float %104, %183 %185 = fadd float %184, %160 %186 = shl i32 %174, 4 %187 = add i32 %186, 120 %188 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %187) %189 = fmul float %104, %188 %190 = fadd float %189, %165 %191 = shl i32 %174, 4 %192 = add i32 %191, 124 %193 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %192) %194 = fmul float %104, %193 %195 = fadd float %194, %170 %196 = call float @llvm.AMDIL.round.nearest.(float %122) %197 = call float @llvm.AMDIL.round.nearest.(float %121) %198 = call float @llvm.AMDIL.round.nearest.(float %123) %199 = call float @llvm.AMDIL.round.nearest.(float %124) %200 = fptosi float %199 to i32 %201 = shl i32 %200, 4 %202 = add i32 %201, 112 %203 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %202) %204 = fmul float %105, %203 %205 = fadd float %204, %180 %206 = shl i32 %200, 4 %207 = add i32 %206, 116 %208 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %207) %209 = fmul float %105, %208 %210 = fadd float %209, %185 %211 = shl i32 %200, 4 %212 = add i32 %211, 120 %213 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %212) %214 = fmul float %105, %213 %215 = fadd float %214, %190 %216 = shl i32 %200, 4 %217 = add i32 %216, 124 %218 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %217) %219 = fmul float %105, %218 %220 = fadd float %219, %195 %221 = fmul float %72, %62 %222 = fadd float %221, %59 %223 = fmul float %73, %63 %224 = fadd float %223, %60 %225 = fmul float %74, %64 %226 = fadd float %225, %61 %227 = fmul float %222, %205 %228 = fmul float %224, %210 %229 = fadd float %227, %228 %230 = fmul float %226, %215 %231 = fadd float %229, %230 %232 = fadd float %231, %220 %233 = fmul float %232, %37 %234 = fmul float %232, %38 %235 = fmul float %232, %39 %236 = fmul float %232, %40 %237 = call float @llvm.AMDIL.round.nearest.(float %122) %238 = fptosi float %237 to i32 %239 = call float @llvm.AMDIL.round.nearest.(float %121) %240 = call float @llvm.AMDIL.round.nearest.(float %123) %241 = call float @llvm.AMDIL.round.nearest.(float %124) %242 = shl i32 %238, 4 %243 = add i32 %242, 96 %244 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %243) %245 = fmul float %103, %244 %246 = shl i32 %238, 4 %247 = add i32 %246, 100 %248 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %247) %249 = fmul float %103, %248 %250 = shl i32 %238, 4 %251 = add i32 %250, 104 %252 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %251) %253 = fmul float %103, %252 %254 = shl i32 %238, 4 %255 = add i32 %254, 108 %256 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %255) %257 = fmul float %103, %256 %258 = call float @llvm.AMDIL.round.nearest.(float %122) %259 = fptosi float %258 to i32 %260 = call float @llvm.AMDIL.round.nearest.(float %121) %261 = call float @llvm.AMDIL.round.nearest.(float %123) %262 = call float @llvm.AMDIL.round.nearest.(float %124) %263 = shl i32 %259, 4 %264 = add i32 %263, 128 %265 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %264) %266 = fmul float %103, %265 %267 = shl i32 %259, 4 %268 = add i32 %267, 132 %269 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %268) %270 = fmul float %103, %269 %271 = shl i32 %259, 4 %272 = add i32 %271, 136 %273 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %272) %274 = fmul float %103, %273 %275 = shl i32 %259, 4 %276 = add i32 %275, 140 %277 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %276) %278 = fmul float %103, %277 %279 = call float @llvm.AMDIL.round.nearest.(float %122) %280 = call float @llvm.AMDIL.round.nearest.(float %121) %281 = fptosi float %280 to i32 %282 = call float @llvm.AMDIL.round.nearest.(float %123) %283 = call float @llvm.AMDIL.round.nearest.(float %124) %284 = shl i32 %281, 4 %285 = add i32 %284, 128 %286 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %285) %287 = fmul float %102, %286 %288 = fadd float %287, %266 %289 = shl i32 %281, 4 %290 = add i32 %289, 132 %291 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %290) %292 = fmul float %102, %291 %293 = fadd float %292, %270 %294 = shl i32 %281, 4 %295 = add i32 %294, 136 %296 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %295) %297 = fmul float %102, %296 %298 = fadd float %297, %274 %299 = shl i32 %281, 4 %300 = add i32 %299, 140 %301 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %300) %302 = fmul float %102, %301 %303 = fadd float %302, %278 %304 = call float @llvm.AMDIL.round.nearest.(float %122) %305 = call float @llvm.AMDIL.round.nearest.(float %121) %306 = fptosi float %305 to i32 %307 = call float @llvm.AMDIL.round.nearest.(float %123) %308 = call float @llvm.AMDIL.round.nearest.(float %124) %309 = shl i32 %306, 4 %310 = add i32 %309, 96 %311 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %310) %312 = fmul float %102, %311 %313 = fadd float %312, %245 %314 = shl i32 %306, 4 %315 = add i32 %314, 100 %316 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %315) %317 = fmul float %102, %316 %318 = fadd float %317, %249 %319 = shl i32 %306, 4 %320 = add i32 %319, 104 %321 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %320) %322 = fmul float %102, %321 %323 = fadd float %322, %253 %324 = shl i32 %306, 4 %325 = add i32 %324, 108 %326 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %325) %327 = fmul float %102, %326 %328 = fadd float %327, %257 %329 = call float @llvm.AMDIL.round.nearest.(float %122) %330 = call float @llvm.AMDIL.round.nearest.(float %121) %331 = call float @llvm.AMDIL.round.nearest.(float %123) %332 = fptosi float %331 to i32 %333 = call float @llvm.AMDIL.round.nearest.(float %124) %334 = shl i32 %332, 4 %335 = add i32 %334, 96 %336 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %335) %337 = fmul float %104, %336 %338 = fadd float %337, %313 %339 = shl i32 %332, 4 %340 = add i32 %339, 100 %341 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %340) %342 = fmul float %104, %341 %343 = fadd float %342, %318 %344 = shl i32 %332, 4 %345 = add i32 %344, 104 %346 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %345) %347 = fmul float %104, %346 %348 = fadd float %347, %323 %349 = shl i32 %332, 4 %350 = add i32 %349, 108 %351 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %350) %352 = fmul float %104, %351 %353 = fadd float %352, %328 %354 = call float @llvm.AMDIL.round.nearest.(float %122) %355 = call float @llvm.AMDIL.round.nearest.(float %121) %356 = call float @llvm.AMDIL.round.nearest.(float %123) %357 = fptosi float %356 to i32 %358 = call float @llvm.AMDIL.round.nearest.(float %124) %359 = shl i32 %357, 4 %360 = add i32 %359, 128 %361 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %360) %362 = fmul float %104, %361 %363 = fadd float %362, %288 %364 = shl i32 %357, 4 %365 = add i32 %364, 132 %366 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %365) %367 = fmul float %104, %366 %368 = fadd float %367, %293 %369 = shl i32 %357, 4 %370 = add i32 %369, 136 %371 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %370) %372 = fmul float %104, %371 %373 = fadd float %372, %298 %374 = shl i32 %357, 4 %375 = add i32 %374, 140 %376 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %375) %377 = fmul float %104, %376 %378 = fadd float %377, %303 %379 = call float @llvm.AMDIL.round.nearest.(float %122) %380 = call float @llvm.AMDIL.round.nearest.(float %121) %381 = call float @llvm.AMDIL.round.nearest.(float %123) %382 = call float @llvm.AMDIL.round.nearest.(float %124) %383 = fptosi float %382 to i32 %384 = shl i32 %383, 4 %385 = add i32 %384, 128 %386 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %385) %387 = fmul float %105, %386 %388 = fadd float %387, %363 %389 = shl i32 %383, 4 %390 = add i32 %389, 132 %391 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %390) %392 = fmul float %105, %391 %393 = fadd float %392, %368 %394 = shl i32 %383, 4 %395 = add i32 %394, 136 %396 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %395) %397 = fmul float %105, %396 %398 = fadd float %397, %373 %399 = shl i32 %383, 4 %400 = add i32 %399, 140 %401 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %400) %402 = fmul float %105, %401 %403 = fadd float %402, %378 %404 = call float @llvm.AMDIL.round.nearest.(float %122) %405 = call float @llvm.AMDIL.round.nearest.(float %121) %406 = call float @llvm.AMDIL.round.nearest.(float %123) %407 = call float @llvm.AMDIL.round.nearest.(float %124) %408 = fptosi float %407 to i32 %409 = shl i32 %408, 4 %410 = add i32 %409, 96 %411 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %410) %412 = fmul float %105, %411 %413 = fadd float %412, %338 %414 = shl i32 %408, 4 %415 = add i32 %414, 100 %416 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %415) %417 = fmul float %105, %416 %418 = fadd float %417, %343 %419 = shl i32 %408, 4 %420 = add i32 %419, 104 %421 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %420) %422 = fmul float %105, %421 %423 = fadd float %422, %348 %424 = shl i32 %408, 4 %425 = add i32 %424, 108 %426 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %425) %427 = fmul float %105, %426 %428 = fadd float %427, %353 %429 = fmul float %222, %413 %430 = fmul float %224, %418 %431 = fadd float %429, %430 %432 = fmul float %226, %423 %433 = fadd float %431, %432 %434 = fadd float %433, %428 %435 = fmul float %222, %388 %436 = fmul float %224, %393 %437 = fadd float %435, %436 %438 = fmul float %226, %398 %439 = fadd float %437, %438 %440 = fadd float %439, %403 %441 = fmul float %33, %434 %442 = fadd float %441, %233 %443 = fmul float %34, %434 %444 = fadd float %443, %234 %445 = fmul float %35, %434 %446 = fadd float %445, %235 %447 = fmul float %36, %434 %448 = fadd float %447, %236 %449 = fmul float %41, %440 %450 = fadd float %449, %442 %451 = fmul float %42, %440 %452 = fadd float %451, %444 %453 = fmul float %43, %440 %454 = fadd float %453, %446 %455 = fmul float %44, %440 %456 = fadd float %455, %448 %457 = fadd float %450, %45 %458 = fadd float %452, %46 %459 = fadd float %454, %47 %460 = fadd float %456, %48 %461 = fmul float %32, %457 %462 = fsub float %29, %461 %463 = fmul float %32, %458 %464 = fsub float %30, %463 %465 = fmul float %32, %459 %466 = fsub float %31, %465 %467 = fmul float %464, %53 %468 = fmul float %464, %54 %469 = fmul float %464, %55 %470 = fmul float %49, %462 %471 = fadd float %470, %467 %472 = fmul float %50, %462 %473 = fadd float %472, %468 %474 = fmul float %51, %462 %475 = fadd float %474, %469 %476 = fmul float %56, %466 %477 = fadd float %476, %471 %478 = fmul float %57, %466 %479 = fadd float %478, %473 %480 = fmul float %58, %466 %481 = fadd float %480, %475 %482 = fmul float %79, 0x3F80101020000000 %483 = fadd float %482, -1.000000e+00 %484 = fmul float %80, 0x3F80101020000000 %485 = fadd float %484, -1.000000e+00 %486 = fmul float %81, 0x3F80101020000000 %487 = fadd float %486, -1.000000e+00 %488 = fmul float %483, %413 %489 = fmul float %485, %418 %490 = fadd float %489, %488 %491 = fmul float %487, %423 %492 = fadd float %490, %491 %493 = fmul float %113, %413 %494 = fmul float %115, %418 %495 = fadd float %494, %493 %496 = fmul float %117, %423 %497 = fadd float %495, %496 %498 = fmul float %483, %205 %499 = fmul float %485, %210 %500 = fadd float %499, %498 %501 = fmul float %487, %215 %502 = fadd float %500, %501 %503 = fmul float %113, %205 %504 = fmul float %115, %210 %505 = fadd float %504, %503 %506 = fmul float %117, %215 %507 = fadd float %505, %506 %508 = fmul float %483, %388 %509 = fmul float %485, %393 %510 = fadd float %509, %508 %511 = fmul float %487, %398 %512 = fadd float %510, %511 %513 = fmul float %113, %388 %514 = fmul float %115, %393 %515 = fadd float %514, %513 %516 = fmul float %117, %398 %517 = fadd float %515, %516 %518 = fmul float %492, %477 %519 = fmul float %502, %479 %520 = fadd float %519, %518 %521 = fmul float %512, %481 %522 = fadd float %520, %521 %523 = fmul float %502, %517 %524 = fmul float %512, %497 %525 = fmul float %492, %507 %526 = fmul float %507, %512 %527 = fsub float %526, %523 %528 = fmul float %517, %492 %529 = fsub float %528, %524 %530 = fmul float %497, %502 %531 = fsub float %530, %525 %532 = fmul float %119, %527 %533 = fmul float %119, %529 %534 = fmul float %119, %531 %535 = fmul float %532, %477 %536 = fmul float %533, %479 %537 = fadd float %536, %535 %538 = fmul float %534, %481 %539 = fadd float %537, %538 %540 = fmul float %497, %477 %541 = fmul float %507, %479 %542 = fadd float %541, %540 %543 = fmul float %517, %481 %544 = fadd float %542, %543 %545 = fmul float %66, %53 %546 = fmul float %66, %54 %547 = fmul float %66, %55 %548 = fmul float %49, %65 %549 = fadd float %548, %545 %550 = fmul float %50, %65 %551 = fadd float %550, %546 %552 = fmul float %51, %65 %553 = fadd float %552, %547 %554 = fmul float %56, %67 %555 = fadd float %554, %549 %556 = fmul float %57, %67 %557 = fadd float %556, %551 %558 = fmul float %58, %67 %559 = fadd float %558, %553 %560 = fmul float %492, %555 %561 = fmul float %502, %557 %562 = fadd float %561, %560 %563 = fmul float %512, %559 %564 = fadd float %562, %563 %565 = fmul float %532, %555 %566 = fmul float %533, %557 %567 = fadd float %566, %565 %568 = fmul float %534, %559 %569 = fadd float %567, %568 %570 = fmul float %497, %555 %571 = fmul float %507, %557 %572 = fadd float %571, %570 %573 = fmul float %517, %559 %574 = fadd float %572, %573 %575 = fmul float %492, %56 %576 = fmul float %502, %57 %577 = fadd float %576, %575 %578 = fmul float %512, %58 %579 = fadd float %577, %578 %580 = fmul float %502, %37 %581 = fmul float %502, %39 %582 = fmul float %33, %492 %583 = fadd float %582, %580 %584 = fmul float %35, %492 %585 = fadd float %584, %581 %586 = fmul float %41, %512 %587 = fadd float %586, %583 %588 = fmul float %43, %512 %589 = fadd float %588, %585 %590 = fmul float %532, %56 %591 = fmul float %533, %57 %592 = fadd float %591, %590 %593 = fmul float %534, %58 %594 = fadd float %592, %593 %595 = fmul float %497, %56 %596 = fmul float %507, %57 %597 = fadd float %596, %595 %598 = fmul float %517, %58 %599 = fadd float %597, %598 %600 = fmul float %507, %37 %601 = fmul float %507, %39 %602 = fmul float %33, %497 %603 = fadd float %602, %600 %604 = fmul float %35, %497 %605 = fadd float %604, %601 %606 = fmul float %41, %517 %607 = fadd float %606, %603 %608 = fmul float %43, %517 %609 = fadd float %608, %605 %610 = fmul float %533, %37 %611 = fmul float %533, %39 %612 = fmul float %33, %532 %613 = fadd float %612, %610 %614 = fmul float %35, %532 %615 = fadd float %614, %611 %616 = fmul float %41, %534 %617 = fadd float %616, %613 %618 = fmul float %43, %534 %619 = fadd float %618, %615 %620 = fmul float %458, %17 %621 = fmul float %458, %18 %622 = fmul float %458, %19 %623 = fmul float %458, %20 %624 = fmul float %13, %457 %625 = fadd float %624, %620 %626 = fmul float %14, %457 %627 = fadd float %626, %621 %628 = fmul float %15, %457 %629 = fadd float %628, %622 %630 = fmul float %16, %457 %631 = fadd float %630, %623 %632 = fmul float %21, %459 %633 = fadd float %632, %625 %634 = fmul float %22, %459 %635 = fadd float %634, %627 %636 = fmul float %23, %459 %637 = fadd float %636, %629 %638 = fmul float %24, %459 %639 = fadd float %638, %631 %640 = fmul float %25, %460 %641 = fadd float %640, %633 %642 = fmul float %26, %460 %643 = fadd float %642, %635 %644 = fmul float %27, %460 %645 = fadd float %644, %637 %646 = fmul float %28, %460 %647 = fadd float %646, %639 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %587, float %617, float %607, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %589, float %619, float %609, float %120) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %110, float %111, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %564, float %569, float %574, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %641, float %643, float %645, float %647) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %522, float %539, float %544, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %579, float %594, float %599, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %641, float %643, float %645, float %647) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.round.nearest.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v3, 0x3c008081 ; 7E0602FF 3C008081 v_mov_b32_e32 v1, 0 ; 7E020280 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_movk_i32 s33, 0x74 ; B0210074 s_movk_i32 s34, 0x70 ; B0220070 s_load_dwordx4 s[36:39], s[8:9], 0x0 ; C0920900 s_movk_i32 s35, 0x64 ; B0230064 s_movk_i32 s40, 0x78 ; B0280078 s_movk_i32 s28, 0x60 ; B01C0060 s_movk_i32 s41, 0x84 ; B0290084 s_movk_i32 s32, 0x7c ; B020007C s_movk_i32 s27, 0x68 ; B01B0068 s_movk_i32 s31, 0x80 ; B01F0080 s_movk_i32 s26, 0x6c ; B01A006C s_movk_i32 s30, 0x88 ; B01E0088 s_movk_i32 s29, 0x8c ; B01D008C s_movk_i32 s0, 0xee4 ; B0000EE4 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[4:7], s0 ; C2000400 s_movk_i32 s1, 0xef4 ; B0010EF4 s_buffer_load_dword s42, s[4:7], s1 ; C2150401 s_movk_i32 s1, 0xee0 ; B0010EE0 s_buffer_load_dword s1, s[4:7], s1 ; C2008401 s_movk_i32 s2, 0xef0 ; B0020EF0 s_buffer_load_dword s43, s[4:7], s2 ; C2158402 s_movk_i32 s2, 0xee8 ; B0020EE8 s_buffer_load_dword s2, s[4:7], s2 ; C2010402 s_movk_i32 s3, 0xef8 ; B0030EF8 s_buffer_load_dword s44, s[4:7], s3 ; C2160403 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s0 ; 7E080200 s_movk_i32 s0, 0xe84 ; B0000E84 s_buffer_load_dword s14, s[4:7], s0 ; C2070400 s_movk_i32 s0, 0xe74 ; B0000E74 s_buffer_load_dword s13, s[4:7], s0 ; C2068400 v_mov_b32_e32 v5, s1 ; 7E0A0201 s_movk_i32 s0, 0xe94 ; B0000E94 s_buffer_load_dword s1, s[4:7], s0 ; C2008400 s_movk_i32 s0, 0xe80 ; B0000E80 s_buffer_load_dword s16, s[4:7], s0 ; C2080400 v_mov_b32_e32 v6, s2 ; 7E0C0202 s_movk_i32 s0, 0xea4 ; B0000EA4 s_buffer_load_dword s2, s[4:7], s0 ; C2010400 s_movk_i32 s0, 0xe70 ; B0000E70 s_buffer_load_dword s15, s[4:7], s0 ; C2078400 s_movk_i32 s0, 0xe88 ; B0000E88 s_buffer_load_dword s18, s[4:7], s0 ; C2090400 s_buffer_load_dword s0, s[4:7], 0xf ; C200050F s_movk_i32 s3, 0xe78 ; B0030E78 s_buffer_load_dword s17, s[4:7], s3 ; C2088403 s_movk_i32 s3, 0xe90 ; B0030E90 s_buffer_load_dword s11, s[4:7], s3 ; C2058403 s_movk_i32 s3, 0xec0 ; B0030EC0 s_buffer_load_dword s3, s[4:7], s3 ; C2018403 s_movk_i32 s10, 0xf04 ; B00A0F04 s_buffer_load_dword s23, s[4:7], s10 ; C20B840A s_movk_i32 s10, 0xe98 ; B00A0E98 s_buffer_load_dword s12, s[4:7], s10 ; C206040A s_movk_i32 s10, 0xea0 ; B00A0EA0 s_buffer_load_dword s10, s[4:7], s10 ; C205040A s_movk_i32 s19, 0xeb0 ; B0130EB0 s_buffer_load_dword s19, s[4:7], s19 ; C2098413 s_movk_i32 s20, 0xec4 ; B0140EC4 s_buffer_load_dword s21, s[4:7], s20 ; C20A8414 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s3 ; 7E0E0203 s_movk_i32 s20, 0xf00 ; B0140F00 s_buffer_load_dword s24, s[4:7], s20 ; C20C0414 v_mul_f32_e32 v7, s23, v7 ; 100E0E17 s_movk_i32 s20, 0xeb4 ; B0140EB4 s_buffer_load_dword s20, s[4:7], s20 ; C20A0414 s_movk_i32 s22, 0xec8 ; B0160EC8 s_buffer_load_dword s22, s[4:7], s22 ; C20B0416 s_movk_i32 s25, 0xebc ; B0190EBC s_buffer_load_dword s25, s[4:7], s25 ; C20C8419 s_load_dwordx4 s[48:51], s[8:9], 0x4 ; C0980904 s_load_dwordx4 s[52:55], s[8:9], 0x8 ; C09A0908 s_load_dwordx4 s[56:59], s[8:9], 0xc ; C09C090C s_load_dwordx4 s[60:63], s[8:9], 0x10 ; C09E0910 buffer_load_format_xyzw v[8:11], v0, s[36:39], 0 idxen ; E00C2000 80090800 s_load_dwordx4 s[36:39], s[8:9], 0x14 ; C0920914 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[11:14], v0, s[48:51], 0 idxen ; E00C2000 800C0B00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[14:17], v0, s[52:55], 0 idxen ; E00C2000 800D0E00 buffer_load_format_xyzw v[18:21], v0, s[56:59], 0 idxen ; E00C2000 800E1200 buffer_load_format_xyzw v[22:25], v0, s[60:63], 0 idxen ; E00C2000 800F1600 buffer_load_format_xyzw v[26:29], v0, s[36:39], 0 idxen ; E00C2000 80091A00 v_mad_f32 v0, s42, v9, v4 ; D2820000 0412122A v_mad_f32 v4, s43, v8, v5 ; D2820004 0416102B v_mad_f32 v5, s44, v10, v6 ; D2820005 041A142C v_mad_f32 v6, v11, v3, -1.0 ; D2820006 03CE070B s_waitcnt vmcnt(3) ; BF8C0773 v_mad_f32 v8, v14, v3, -1.0 ; D2820008 03CE070E s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v9, 0x40400000, v19 ; 101226FF 40400000 v_rndne_f32_e32 v9, v9 ; 7E124709 v_cvt_i32_f32_e32 v9, v9 ; 7E121109 v_mul_f32_e32 v10, 0x40400000, v18 ; 101424FF 40400000 v_rndne_f32_e32 v10, v10 ; 7E14470A v_cvt_i32_f32_e32 v10, v10 ; 7E14110A v_lshlrev_b32_e32 v9, 4, v9 ; 34121284 v_mul_f32_e32 v11, 0x40400000, v20 ; 101628FF 40400000 v_mul_f32_e32 v14, 0x40400000, v21 ; 101C2AFF 40400000 v_rndne_f32_e32 v11, v11 ; 7E16470B v_rndne_f32_e32 v14, v14 ; 7E1C470E v_cvt_i32_f32_e32 v11, v11 ; 7E16110B v_cvt_i32_f32_e32 v14, v14 ; 7E1C110E v_lshlrev_b32_e32 v10, 4, v10 ; 34141484 v_add_i32_e32 v18, s33, v9 ; 4A241221 buffer_load_dword v18, v18, s[4:7], 0 offen ; E0301000 80011212 v_lshlrev_b32_e32 v11, 4, v11 ; 34161684 v_lshlrev_b32_e32 v14, 4, v14 ; 341C1C84 v_add_i32_e32 v19, s34, v9 ; 4A261222 buffer_load_dword v19, v19, s[4:7], 0 offen ; E0301000 80011313 v_add_i32_e32 v20, s33, v10 ; 4A281421 buffer_load_dword v20, v20, s[4:7], 0 offen ; E0301000 80011414 v_add_i32_e32 v21, s35, v9 ; 4A2A1223 buffer_load_dword v21, v21, s[4:7], 0 offen ; E0301000 80011515 v_mad_f32 v15, v15, v3, -1.0 ; D282000F 03CE070F v_mad_f32 v12, v12, v3, -1.0 ; D282000C 03CE070C v_mad_f32 v13, v13, v3, -1.0 ; D282000D 03CE070D s_waitcnt vmcnt(4) ; BF8C0774 v_add_i32_e32 v28, s40, v9 ; 4A381228 buffer_load_dword v28, v28, s[4:7], 0 offen ; E0301000 80011C1C v_mad_f32 v16, v16, v3, -1.0 ; D2820010 03CE0710 v_mad_f32 v3, v17, v3, -1.0 ; D2820003 03CE0711 v_add_i32_e32 v17, s34, v10 ; 4A221422 buffer_load_dword v17, v17, s[4:7], 0 offen ; E0301000 80011111 v_add_i32_e32 v29, s33, v11 ; 4A3A1621 buffer_load_dword v29, v29, s[4:7], 0 offen ; E0301000 80011D1D v_add_i32_e32 v30, s28, v9 ; 4A3C121C buffer_load_dword v30, v30, s[4:7], 0 offen ; E0301000 80011E1E v_add_i32_e32 v31, s33, v14 ; 4A3E1C21 buffer_load_dword v31, v31, s[4:7], 0 offen ; E0301000 80011F1F v_add_i32_e32 v32, s41, v9 ; 4A401229 buffer_load_dword v32, v32, s[4:7], 0 offen ; E0301000 80012020 v_add_i32_e32 v33, s35, v10 ; 4A421423 buffer_load_dword v33, v33, s[4:7], 0 offen ; E0301000 80012121 v_add_i32_e32 v34, s32, v9 ; 4A441220 buffer_load_dword v34, v34, s[4:7], 0 offen ; E0301000 80012222 v_add_i32_e32 v35, s40, v10 ; 4A461428 buffer_load_dword v35, v35, s[4:7], 0 offen ; E0301000 80012323 v_add_i32_e32 v36, s34, v11 ; 4A481622 buffer_load_dword v36, v36, s[4:7], 0 offen ; E0301000 80012424 v_add_i32_e32 v37, s27, v9 ; 4A4A121B buffer_load_dword v37, v37, s[4:7], 0 offen ; E0301000 80012525 v_add_i32_e32 v38, s31, v9 ; 4A4C121F buffer_load_dword v38, v38, s[4:7], 0 offen ; E0301000 80012626 v_add_i32_e32 v39, s41, v10 ; 4A4E1429 buffer_load_dword v39, v39, s[4:7], 0 offen ; E0301000 80012727 v_add_i32_e32 v40, s28, v10 ; 4A50141C buffer_load_dword v40, v40, s[4:7], 0 offen ; E0301000 80012828 v_add_i32_e32 v41, s35, v11 ; 4A521623 buffer_load_dword v41, v41, s[4:7], 0 offen ; E0301000 80012929 v_add_i32_e32 v42, s32, v10 ; 4A541420 buffer_load_dword v42, v42, s[4:7], 0 offen ; E0301000 80012A2A v_add_i32_e32 v43, s40, v11 ; 4A561628 buffer_load_dword v43, v43, s[4:7], 0 offen ; E0301000 80012B2B v_add_i32_e32 v44, s34, v14 ; 4A581C22 buffer_load_dword v44, v44, s[4:7], 0 offen ; E0301000 80012C2C v_add_i32_e32 v45, s26, v9 ; 4A5A121A buffer_load_dword v45, v45, s[4:7], 0 offen ; E0301000 80012D2D v_add_i32_e32 v46, s30, v9 ; 4A5C121E buffer_load_dword v46, v46, s[4:7], 0 offen ; E0301000 80012E2E v_add_i32_e32 v9, s29, v9 ; 4A12121D buffer_load_dword v9, v9, s[4:7], 0 offen ; E0301000 80010909 v_add_i32_e32 v47, s31, v10 ; 4A5E141F buffer_load_dword v47, v47, s[4:7], 0 offen ; E0301000 80012F2F v_add_i32_e32 v48, s27, v10 ; 4A60141B buffer_load_dword v48, v48, s[4:7], 0 offen ; E0301000 80013030 v_add_i32_e32 v49, s28, v11 ; 4A62161C buffer_load_dword v49, v49, s[4:7], 0 offen ; E0301000 80013131 v_add_i32_e32 v50, s41, v11 ; 4A641629 buffer_load_dword v50, v50, s[4:7], 0 offen ; E0301000 80013232 v_add_i32_e32 v51, s35, v14 ; 4A661C23 v_add_i32_e32 v52, s32, v11 ; 4A681620 buffer_load_dword v52, v52, s[4:7], 0 offen ; E0301000 80013434 v_add_i32_e32 v53, s40, v14 ; 4A6A1C28 v_add_i32_e32 v54, s30, v10 ; 4A6C141E buffer_load_dword v54, v54, s[4:7], 0 offen ; E0301000 80013636 v_add_i32_e32 v55, s26, v10 ; 4A6E141A v_add_i32_e32 v10, s29, v10 ; 4A14141D buffer_load_dword v55, v55, s[4:7], 0 offen ; E0301000 80013737 buffer_load_dword v10, v10, s[4:7], 0 offen ; E0301000 80010A0A v_add_i32_e32 v56, s27, v11 ; 4A70161B buffer_load_dword v56, v56, s[4:7], 0 offen ; E0301000 80013838 v_add_i32_e32 v57, s31, v11 ; 4A72161F buffer_load_dword v57, v57, s[4:7], 0 offen ; E0301000 80013939 v_add_i32_e32 v58, s41, v14 ; 4A741C29 v_add_i32_e32 v59, s26, v11 ; 4A76161A v_add_i32_e32 v60, s30, v11 ; 4A78161E v_add_i32_e32 v11, s29, v11 ; 4A16161D buffer_load_dword v59, v59, s[4:7], 0 offen ; E0301000 80013B3B buffer_load_dword v60, v60, s[4:7], 0 offen ; E0301000 80013C3C buffer_load_dword v11, v11, s[4:7], 0 offen ; E0301000 80010B0B v_add_i32_e32 v61, s32, v14 ; 4A7A1C20 v_add_i32_e32 v62, s31, v14 ; 4A7C1C1F v_add_i32_e32 v63, s30, v14 ; 4A7E1C1E v_add_i32_e32 v64, s29, v14 ; 4A801C1D v_add_i32_e32 v65, s28, v14 ; 4A821C1C v_add_i32_e32 v66, s27, v14 ; 4A841C1B v_add_i32_e32 v14, s26, v14 ; 4A1C1C1A buffer_load_dword v53, v53, s[4:7], 0 offen ; E0301000 80013535 buffer_load_dword v61, v61, s[4:7], 0 offen ; E0301000 80013D3D buffer_load_dword v62, v62, s[4:7], 0 offen ; E0301000 80013E3E buffer_load_dword v58, v58, s[4:7], 0 offen ; E0301000 80013A3A buffer_load_dword v63, v63, s[4:7], 0 offen ; E0301000 80013F3F buffer_load_dword v64, v64, s[4:7], 0 offen ; E0301000 80014040 buffer_load_dword v65, v65, s[4:7], 0 offen ; E0301000 80014141 buffer_load_dword v51, v51, s[4:7], 0 offen ; E0301000 80013333 buffer_load_dword v66, v66, s[4:7], 0 offen ; E0301000 80014242 buffer_load_dword v14, v14, s[4:7], 0 offen ; E0301000 80010E0E s_waitcnt ; BF8C077F v_mul_f32_e32 v19, v19, v23 ; 10262F13 v_mad_f32 v17, v22, v17, v19 ; D2820011 044E2316 v_mul_f32_e32 v18, v18, v23 ; 10242F12 v_mad_f32 v18, v22, v20, v18 ; D2820012 044A2916 v_mul_f32_e32 v19, v28, v23 ; 10262F1C v_mad_f32 v19, v22, v35, v19 ; D2820013 044E4716 v_mul_f32_e32 v20, v34, v23 ; 10282F22 v_mad_f32 v20, v22, v42, v20 ; D2820014 04525516 v_mul_f32_e32 v28, v38, v23 ; 10382F26 v_mad_f32 v28, v22, v47, v28 ; D282001C 04725F16 v_mul_f32_e32 v32, v32, v23 ; 10402F20 v_mad_f32 v32, v22, v39, v32 ; D2820020 04824F16 v_mul_f32_e32 v34, v46, v23 ; 10442F2E v_mad_f32 v34, v22, v54, v34 ; D2820022 048A6D16 v_mul_f32_e32 v9, v9, v23 ; 10122F09 v_mad_f32 v9, v22, v10, v9 ; D2820009 04261516 v_mul_f32_e32 v10, v30, v23 ; 10142F1E v_mad_f32 v10, v22, v40, v10 ; D282000A 042A5116 v_mul_f32_e32 v21, v21, v23 ; 102A2F15 v_mad_f32 v21, v22, v33, v21 ; D2820015 04564316 v_mul_f32_e32 v30, v37, v23 ; 103C2F25 v_mad_f32 v30, v22, v48, v30 ; D282001E 047A6116 v_mul_f32_e32 v23, v45, v23 ; 102E2F2D v_mad_f32 v22, v22, v55, v23 ; D2820016 045E6F16 v_mad_f32 v17, v24, v36, v17 ; D2820011 04464918 v_mad_f32 v18, v24, v29, v18 ; D2820012 044A3B18 v_mad_f32 v19, v24, v43, v19 ; D2820013 044E5718 v_mad_f32 v20, v24, v52, v20 ; D2820014 04526918 v_mad_f32 v10, v24, v49, v10 ; D282000A 042A6318 v_mad_f32 v21, v24, v41, v21 ; D2820015 04565318 s_waitcnt vmcnt(14) ; BF8C077E v_mad_f32 v23, v24, v56, v30 ; D2820017 047A7118 s_waitcnt vmcnt(12) ; BF8C077C v_mad_f32 v22, v24, v59, v22 ; D2820016 045A7718 v_mad_f32 v28, v24, v57, v28 ; D282001C 04727318 v_mad_f32 v29, v24, v50, v32 ; D282001D 04826518 s_waitcnt vmcnt(11) ; BF8C077B v_mad_f32 v30, v24, v60, v34 ; D282001E 048A7918 s_waitcnt vmcnt(10) ; BF8C077A v_mad_f32 v9, v24, v11, v9 ; D2820009 04261718 v_mad_f32 v11, v25, v44, v17 ; D282000B 04465919 v_mad_f32 v17, v25, v31, v18 ; D2820011 044A3F19 s_waitcnt vmcnt(9) ; BF8C0779 v_mad_f32 v18, v25, v53, v19 ; D2820012 044E6B19 s_waitcnt vmcnt(8) ; BF8C0778 v_mad_f32 v19, v25, v61, v20 ; D2820013 04527B19 s_waitcnt vmcnt(7) ; BF8C0777 v_mad_f32 v20, v25, v62, v28 ; D2820014 04727D19 s_waitcnt vmcnt(6) ; BF8C0776 v_mad_f32 v24, v25, v58, v29 ; D2820018 04767519 s_waitcnt vmcnt(5) ; BF8C0775 v_mad_f32 v28, v25, v63, v30 ; D282001C 047A7F19 s_waitcnt vmcnt(4) ; BF8C0774 v_mad_f32 v9, v25, v64, v9 ; D2820009 04268119 s_waitcnt vmcnt(3) ; BF8C0773 v_mad_f32 v10, v25, v65, v10 ; D282000A 042A8319 s_waitcnt vmcnt(2) ; BF8C0772 v_mad_f32 v21, v25, v51, v21 ; D2820015 04566719 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v23, v25, v66, v23 ; D2820017 045E8519 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v14, v25, v14, v22 ; D282000E 045A1D19 v_mul_f32_e32 v22, v10, v6 ; 102C0D0A v_mul_f32_e32 v25, v11, v6 ; 10320D0B v_mul_f32_e32 v6, v20, v6 ; 100C0D14 v_mad_f32 v22, v12, v21, v22 ; D2820016 045A2B0C v_mad_f32 v25, v12, v17, v25 ; D2820019 0466230C v_mad_f32 v6, v12, v24, v6 ; D2820006 041A310C v_mul_f32_e32 v12, v10, v8 ; 1018110A v_mul_f32_e32 v29, v11, v8 ; 103A110B v_mul_f32_e32 v8, v20, v8 ; 10101114 v_mad_f32 v12, v15, v21, v12 ; D282000C 04322B0F v_mad_f32 v29, v15, v17, v29 ; D282001D 0476230F v_mad_f32 v8, v15, v24, v8 ; D2820008 0422310F v_mad_f32 v15, v13, v23, v22 ; D282000F 045A2F0D v_mad_f32 v22, v13, v18, v25 ; D2820016 0466250D v_mad_f32 v6, v13, v28, v6 ; D2820006 041A390D v_mad_f32 v12, v16, v23, v12 ; D282000C 04322F10 v_mad_f32 v13, v16, v18, v29 ; D282000D 04762510 v_mad_f32 v8, v16, v28, v8 ; D2820008 04223910 v_mul_f32_e32 v16, v8, v22 ; 10202D08 v_mad_f32 v16, v13, v6, -v16 ; D2820010 84420D0D v_mul_f32_e32 v25, v12, v6 ; 10320D0C v_mad_f32 v25, v8, v15, -v25 ; D2820019 84661F08 v_mul_f32_e32 v29, v13, v15 ; 103A1F0D v_mad_f32 v29, v12, v22, -v29 ; D282001D 84762D0C v_mul_f32_e32 v30, s16, v22 ; 103C2C10 v_mad_f32 v30, s15, v15, v30 ; D282001E 047A1E0F v_mul_f32_e32 v31, s16, v13 ; 103E1A10 v_mad_f32 v31, s15, v12, v31 ; D282001F 047E180F v_mul_f32_e32 v16, v16, v3 ; 10200710 v_mul_f32_e32 v25, v25, v3 ; 10320719 v_mul_f32_e32 v29, v29, v3 ; 103A071D v_mad_f32 v30, s11, v6, v30 ; D282001E 047A0C0B v_mad_f32 v31, s11, v8, v31 ; D282001F 047E100B v_mul_f32_e32 v32, s16, v25 ; 10403210 v_mad_f32 v32, s15, v16, v32 ; D2820020 0482200F v_mad_f32 v32, s11, v29, v32 ; D2820020 04823A0B exp 15, 32, 0, 0, 0, v30, v32, v31, v1 ; F800020F 011F201E v_mul_f32_e32 v3, s25, v3 ; 10060619 s_movk_i32 s8, 0xeb8 ; B0080EB8 s_buffer_load_dword s8, s[4:7], s8 ; C2040408 s_movk_i32 s9, 0xed0 ; B0090ED0 s_buffer_load_dword s9, s[4:7], s9 ; C2048409 s_movk_i32 s25, 0xed4 ; B0190ED4 s_buffer_load_dword s25, s[4:7], s25 ; C20C8419 s_movk_i32 s26, 0xed8 ; B01A0ED8 s_buffer_load_dword s26, s[4:7], s26 ; C20D041A s_movk_i32 s27, 0xf08 ; B01B0F08 s_buffer_load_dword s27, s[4:7], s27 ; C20D841B s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v30, s18, v22 ; 103C2C12 v_mad_f32 v30, s17, v15, v30 ; D282001E 047A1E11 v_mul_f32_e32 v31, s18, v13 ; 103E1A12 v_mad_f32 v31, s17, v12, v31 ; D282001F 047E1811 v_mad_f32 v30, s12, v6, v30 ; D282001E 047A0C0C v_mad_f32 v31, s12, v8, v31 ; D282001F 047E100C v_mul_f32_e32 v32, s18, v25 ; 10403212 v_mad_f32 v32, s17, v16, v32 ; D2820020 04822011 v_mad_f32 v32, s12, v29, v32 ; D2820020 04823A0C exp 15, 33, 0, 0, 0, v30, v32, v31, v3 ; F800021F 031F201E exp 15, 34, 0, 0, 0, v26, v27, v1, v1 ; F800022F 01011B1A s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v3, s24 ; 7E060218 v_mad_f32 v3, v3, s19, v7 ; D2820003 041C2703 v_mov_b32_e32 v7, s21 ; 7E0E0215 v_mul_f32_e32 v7, s23, v7 ; 100E0E17 v_mov_b32_e32 v26, s24 ; 7E340218 v_mad_f32 v7, v26, s20, v7 ; D2820007 041C291A v_mov_b32_e32 v26, s22 ; 7E340216 v_mul_f32_e32 v26, s23, v26 ; 10343417 v_mov_b32_e32 v27, s24 ; 7E360218 v_mad_f32 v26, v27, s8, v26 ; D282001A 0468111B v_mov_b32_e32 v27, s27 ; 7E36021B v_mad_f32 v3, v27, s9, v3 ; D2820003 040C131B v_mov_b32_e32 v27, s27 ; 7E36021B v_mad_f32 v7, v27, s25, v7 ; D2820007 041C331B v_mov_b32_e32 v27, s27 ; 7E36021B v_mad_f32 v26, v27, s26, v26 ; D282001A 0468351B v_mul_f32_e32 v27, v3, v15 ; 10361F03 v_mul_f32_e32 v30, v3, v16 ; 103C2103 v_mul_f32_e32 v3, v3, v12 ; 10061903 v_mad_f32 v27, v22, v7, v27 ; D282001B 046E0F16 v_mad_f32 v30, v25, v7, v30 ; D282001E 047A0F19 v_mad_f32 v3, v13, v7, v3 ; D2820003 040E0F0D v_mad_f32 v7, v6, v26, v27 ; D2820007 046E3506 v_mad_f32 v27, v29, v26, v30 ; D282001B 047A351D v_mad_f32 v3, v8, v26, v3 ; D2820003 040E3508 exp 15, 35, 0, 0, 0, v7, v27, v3, v1 ; F800023F 01031B07 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v17, v0 ; 10060111 v_mad_f32 v3, v4, v11, v3 ; D2820003 040E1704 v_mul_f32_e32 v7, v21, v0 ; 100E0115 v_mad_f32 v7, v4, v10, v7 ; D2820007 041E1504 s_movk_i32 s23, 0xe8c ; B0170E8C s_buffer_load_dword s23, s[4:7], s23 ; C20B8417 v_mad_f32 v3, v5, v18, v3 ; D2820003 040E2505 v_mad_f32 v7, v5, v23, v7 ; D2820007 041E2F05 s_movk_i32 s24, 0xe7c ; B0180E7C s_buffer_load_dword s24, s[4:7], s24 ; C20C0418 v_add_f32_e32 v3, v19, v3 ; 06060713 v_add_f32_e32 v7, v14, v7 ; 060E0F0E v_mul_f32_e32 v10, s16, v3 ; 10140610 v_mul_f32_e32 v11, s14, v3 ; 1016060E v_mul_f32_e32 v14, s18, v3 ; 101C0612 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s23, v3 ; 10060617 v_mad_f32 v10, s15, v7, v10 ; D282000A 042A0E0F v_mad_f32 v11, s13, v7, v11 ; D282000B 042E0E0D v_mad_f32 v14, s17, v7, v14 ; D282000E 043A0E11 v_mad_f32 v3, s24, v7, v3 ; D2820003 040E0E18 s_movk_i32 s13, 0xe9c ; B00D0E9C s_buffer_load_dword s13, s[4:7], s13 ; C206840D s_movk_i32 s14, 0xea8 ; B00E0EA8 s_buffer_load_dword s14, s[4:7], s14 ; C207040E s_movk_i32 s15, 0xeac ; B00F0EAC s_buffer_load_dword s15, s[4:7], s15 ; C207840F s_buffer_load_dword s16, s[4:7], 0x0 ; C2080500 s_buffer_load_dword s17, s[4:7], 0x1 ; C2088501 s_buffer_load_dword s18, s[4:7], 0x2 ; C2090502 s_buffer_load_dword s23, s[4:7], 0x3 ; C20B8503 s_buffer_load_dword s24, s[4:7], 0x4 ; C20C0504 s_buffer_load_dword s27, s[4:7], 0x5 ; C20D8505 s_buffer_load_dword s28, s[4:7], 0x6 ; C20E0506 s_buffer_load_dword s29, s[4:7], 0x7 ; C20E8507 s_buffer_load_dword s30, s[4:7], 0x8 ; C20F0508 s_buffer_load_dword s31, s[4:7], 0x9 ; C20F8509 s_buffer_load_dword s32, s[4:7], 0xa ; C210050A s_buffer_load_dword s33, s[4:7], 0xb ; C210850B s_buffer_load_dword s34, s[4:7], 0xc ; C211050C s_buffer_load_dword s35, s[4:7], 0xd ; C211850D s_buffer_load_dword s36, s[4:7], 0xe ; C212050E s_buffer_load_dword s37, s[4:7], 0x10 ; C2128510 s_buffer_load_dword s38, s[4:7], 0x11 ; C2130511 s_buffer_load_dword s39, s[4:7], 0x12 ; C2138512 s_buffer_load_dword s4, s[4:7], 0x13 ; C2020513 v_mul_f32_e32 v0, v24, v0 ; 10000118 v_mad_f32 v0, v4, v20, v0 ; D2820000 04022904 v_mad_f32 v0, v5, v28, v0 ; D2820000 04023905 v_add_f32_e32 v0, v9, v0 ; 06000109 v_mad_f32 v4, s11, v0, v10 ; D2820004 042A000B v_mad_f32 v5, s1, v0, v11 ; D2820005 042E0001 v_mad_f32 v7, s12, v0, v14 ; D2820007 043A000C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s13, v0, v3 ; D2820000 040E000D v_add_f32_e32 v3, s2, v5 ; 06060A02 v_mov_b32_e32 v5, s38 ; 7E0A0226 v_mad_f32 v5, -s4, v3, v5 ; D2820005 24160604 v_mul_f32_e32 v9, s3, v5 ; 10120A03 v_mul_f32_e32 v10, s21, v5 ; 10140A15 v_mul_f32_e32 v5, s22, v5 ; 100A0A16 v_add_f32_e32 v4, s10, v4 ; 0608080A v_mul_f32_e32 v11, s24, v3 ; 10160618 v_mul_f32_e32 v14, s27, v3 ; 101C061B v_mul_f32_e32 v17, s28, v3 ; 1022061C v_mul_f32_e32 v3, s29, v3 ; 1006061D v_mad_f32 v11, s16, v4, v11 ; D282000B 042E0810 v_mad_f32 v14, s17, v4, v14 ; D282000E 043A0811 v_mad_f32 v17, s18, v4, v17 ; D2820011 04460812 v_mad_f32 v3, s23, v4, v3 ; D2820003 040E0817 v_mov_b32_e32 v18, s37 ; 7E240225 v_mad_f32 v4, -s4, v4, v18 ; D2820004 244A0804 v_mad_f32 v9, s19, v4, v9 ; D2820009 04260813 v_mad_f32 v10, s20, v4, v10 ; D282000A 042A0814 v_mad_f32 v4, s8, v4, v5 ; D2820004 04160808 v_add_f32_e32 v5, s14, v7 ; 060A0E0E v_mov_b32_e32 v7, s39 ; 7E0E0227 v_mad_f32 v7, -s4, v5, v7 ; D2820007 241E0A04 v_mad_f32 v11, s30, v5, v11 ; D282000B 042E0A1E v_mad_f32 v14, s31, v5, v14 ; D282000E 043A0A1F v_mad_f32 v17, s32, v5, v17 ; D2820011 04460A20 v_mad_f32 v3, s33, v5, v3 ; D2820003 040E0A21 v_add_f32_e32 v0, s15, v0 ; 0600000F v_mad_f32 v5, s34, v0, v11 ; D2820005 042E0022 v_mad_f32 v11, s35, v0, v14 ; D282000B 043A0023 v_mad_f32 v14, s36, v0, v17 ; D282000E 04460024 v_mad_f32 v0, s0, v0, v3 ; D2820000 040E0000 v_mad_f32 v3, s9, v7, v9 ; D2820003 04260E09 v_mad_f32 v9, s25, v7, v10 ; D2820009 042A0E19 v_mad_f32 v4, s26, v7, v4 ; D2820004 04120E1A v_mul_f32_e32 v7, v3, v15 ; 100E1F03 v_mul_f32_e32 v10, v3, v16 ; 10142103 v_mul_f32_e32 v3, v3, v12 ; 10061903 v_mad_f32 v7, v22, v9, v7 ; D2820007 041E1316 v_mad_f32 v10, v25, v9, v10 ; D282000A 042A1319 v_mad_f32 v3, v13, v9, v3 ; D2820003 040E130D v_mad_f32 v7, v6, v4, v7 ; D2820007 041E0906 v_mad_f32 v9, v29, v4, v10 ; D2820009 042A091D v_mad_f32 v3, v8, v4, v3 ; D2820003 040E0908 exp 15, 36, 0, 0, 0, v5, v11, v14, v0 ; F800024F 000E0B05 exp 15, 37, 0, 0, 0, v7, v9, v3, v2 ; F800025F 02030907 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, s9, v15 ; 10041E09 v_mad_f32 v2, v22, s25, v2 ; D2820002 04083316 v_mul_f32_e32 v3, s9, v12 ; 10061809 v_mad_f32 v3, v13, s25, v3 ; D2820003 040C330D v_mad_f32 v2, v6, s26, v2 ; D2820002 04083506 v_mul_f32_e32 v4, s9, v16 ; 10082009 v_mad_f32 v4, v25, s25, v4 ; D2820004 04103319 v_mad_f32 v4, v29, s26, v4 ; D2820004 0410351D v_mad_f32 v3, v8, s26, v3 ; D2820003 040C3508 exp 15, 38, 0, 0, 0, v2, v4, v3, v1 ; F800026F 01030402 exp 15, 12, 0, 1, 0, v5, v11, v14, v0 ; F80008CF 000E0B05 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 68 Code Size: 2716 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], COLOR, PERSPECTIVE DCL IN[1], COLOR[1], PERSPECTIVE DCL IN[2], TEXCOORD[0], PERSPECTIVE DCL IN[3], TEXCOORD[4], PERSPECTIVE DCL IN[4], TEXCOORD[5], PERSPECTIVE DCL IN[5], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL CONST[0..17] DCL TEMP[0] DCL TEMP[1], LOCAL DCL TEMP[2..7] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { 0.8000, 3.0000, -1.0000, 0.8581} IMM[2] FLT32 { -0.0000, 0.3000, 0.5900, 0.1100} IMM[3] FLT32 { -1.0233, 1.0233, 0.8862, 15.0000} IMM[4] FLT32 { 0.0039, 0.0000, 340282346638528859811704183484516925440.0000, 0.2477} IMM[5] FLT32 { -0.8581, 0.2477, 0.4290, 0.9151} 0: TEX TEMP[0], IN[2], SAMP[0], 2D 1: MAD TEMP[0].xyz, TEMP[0], IMM[0].xxxx, IMM[0].yyyy 2: DP3 TEMP[1].x, IN[5], IN[5] 3: RSQ TEMP[1].x, TEMP[1].xxxx 4: MIN TEMP[1].x, IMM[4].zzzz, TEMP[1].xxxx 5: MUL TEMP[2].xyz, IN[5], TEMP[1].xxxx 6: DP3 TEMP[0].w, TEMP[0], TEMP[2] 7: MAX TEMP[2].w, TEMP[0].wwww, IMM[0].wwww 8: MOV_SAT TEMP[0].w, TEMP[2].wwww 9: ADD TEMP[2].w, -TEMP[2].wwww, IMM[0].zzzz 10: ADD TEMP[3].x, TEMP[0].wwww, IMM[2].xxxx 11: POW TEMP[3].y, |TEMP[0].wwww|, CONST[14].zzzz 12: CMP TEMP[0].w, TEMP[3].xxxx, IMM[0].wwww, TEMP[3].yyyy 13: MUL TEMP[3].xyz, TEMP[0].wwww, CONST[13] 14: TEX TEMP[4], IN[2], SAMP[2], 2D 15: DP3 TEMP[3].w, TEMP[4], IMM[2].yzww 16: LRP TEMP[5].xyz, CONST[14].yyyy, TEMP[3].wwww, TEMP[4] 17: LRP TEMP[6].xyz, CONST[14].wwww, TEMP[3].wwww, TEMP[4] 18: ADD TEMP[6].xyz, TEMP[6], TEMP[6] 19: MAX TEMP[7].xyz, |TEMP[6]|, -IMM[2].xxxx 20: MAD TEMP[3].xyz, TEMP[5], TEMP[3], -TEMP[4] 21: TEX TEMP[5], IN[2], SAMP[1], 2D 22: MUL TEMP[0].w, TEMP[0].wwww, TEMP[5].yyyy 23: MAD TEMP[3].xyz, TEMP[0].wwww, TEMP[3], TEMP[4] 24: MAD TEMP[5].xyw, TEMP[7].xyzz, TEMP[7].xyzz, -TEMP[3].xyzz 25: DP3 TEMP[1].x, TEMP[0], TEMP[0] 26: RSQ TEMP[1].x, TEMP[1].xxxx 27: MIN TEMP[1].x, IMM[4].zzzz, TEMP[1].xxxx 28: MUL TEMP[6].xyz, TEMP[0], TEMP[1].xxxx 29: DP3 TEMP[0].w, TEMP[6], TEMP[2] 30: MUL TEMP[7].xyz, TEMP[0].wwww, TEMP[6] 31: MAD TEMP[2].xyz, TEMP[7], IMM[0].xxxx, -TEMP[2] 32: DP3 TEMP[0].x, TEMP[2], TEMP[0] 33: MAX TEMP[3].w, TEMP[0].xxxx, IMM[0].wwww 34: MIN TEMP[0].x, TEMP[3].wwww, IMM[1].xxxx 35: MUL TEMP[0].x, TEMP[0].xxxx, CONST[15].xxxx 36: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[5].zzzz 37: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[5].xyww, TEMP[3] 38: MUL TEMP[0].w, |TEMP[2].wwww|, |TEMP[2].wwww| 39: MUL TEMP[0].w, TEMP[0].wwww, |TEMP[2].wwww| 40: ADD TEMP[2].w, |TEMP[2].wwww|, IMM[2].xxxx 41: MUL TEMP[0].w, TEMP[0].wwww, CONST[14].xxxx 42: MUL TEMP[0].w, TEMP[5].zzzz, TEMP[0].wwww 43: MUL TEMP[3].xyz, TEMP[4], TEMP[0].wwww 44: CMP TEMP[3].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[3] 45: ADD TEMP[0].xyz, TEMP[0], TEMP[3] 46: DP3 TEMP[0].w, TEMP[0], IMM[2].yzww 47: LRP TEMP[3].xyz, CONST[15].yyyy, TEMP[0].wwww, TEMP[0] 48: MUL TEMP[0].xyz, TEMP[3], CONST[15].zzzz 49: MOV TEMP[3].z, IMM[0].zzzz 50: ADD TEMP[3].xyz, TEMP[3].zzzz, -CONST[0] 51: MUL TEMP[0].xyz, TEMP[0], TEMP[3] 52: MAD TEMP[0].xyz, TEMP[0], CONST[4].wwww, CONST[4] 53: DP3 TEMP[1].x, IN[1], IN[1] 54: RSQ TEMP[1].x, TEMP[1].xxxx 55: MIN TEMP[1].x, IMM[4].zzzz, TEMP[1].xxxx 56: MUL TEMP[3].xyz, IN[1], TEMP[1].xxxx 57: DP3 TEMP[1].x, IN[0], IN[0] 58: RSQ TEMP[1].x, TEMP[1].xxxx 59: MIN TEMP[1].x, IMM[4].zzzz, TEMP[1].xxxx 60: MUL TEMP[4].xyz, IN[0], TEMP[1].xxxx 61: MUL TEMP[5].xyz, TEMP[3].zxyw, TEMP[4].yzxw 62: MAD TEMP[5].xyz, TEMP[3].yzxw, TEMP[4].zxyw, -TEMP[5] 63: DP3 TEMP[3].y, TEMP[3], TEMP[6] 64: DP3 TEMP[3].z, TEMP[4], TEMP[6] 65: MUL TEMP[4].xyz, TEMP[5], IN[1].wwww 66: DP3 TEMP[3].x, TEMP[4], TEMP[6] 67: MUL TEMP[4].xz, TEMP[3], TEMP[3].yyyy 68: MUL TEMP[5].xyz, TEMP[3], TEMP[3].xyxw 69: MUL TEMP[7].xyz, TEMP[3], IMM[3].xyxw 70: MAD TEMP[4].w, TEMP[3].zzzz, TEMP[3].zzzz, -TEMP[5].xxxx 71: MAD TEMP[4].y, TEMP[5].yyyy, IMM[1].yyyy, IMM[1].zzzz 72: MUL TEMP[7].w, TEMP[5].zzzz, IMM[1].wwww 73: MUL TEMP[3], TEMP[4], IMM[5].xyxz 74: DP4 TEMP[0].w, CONST[8], TEMP[3] 75: DP4 TEMP[2].w, CONST[7], TEMP[7] 76: MOV TEMP[4].z, IMM[3].zzzz 77: MAD TEMP[2].w, CONST[6].xxxx, TEMP[4].zzzz, TEMP[2].wwww 78: ADD TEMP[5].x, TEMP[0].wwww, TEMP[2].wwww 79: DP4 TEMP[0].w, CONST[10], TEMP[3] 80: DP4 TEMP[2].w, CONST[12], TEMP[3] 81: DP4 TEMP[3].x, CONST[9], TEMP[7] 82: DP4 TEMP[3].y, CONST[11], TEMP[7] 83: MAD TEMP[3].y, CONST[6].zzzz, TEMP[4].zzzz, TEMP[3].yyyy 84: ADD TEMP[5].z, TEMP[2].wwww, TEMP[3].yyyy 85: MAD TEMP[2].w, CONST[6].yyyy, TEMP[4].zzzz, TEMP[3].xxxx 86: ADD TEMP[5].y, TEMP[0].wwww, TEMP[2].wwww 87: MAX TEMP[3].xyz, TEMP[5], IMM[0].wwww 88: MAD TEMP[3].xyz, TEMP[0], TEMP[3], CONST[0] 89: TEX TEMP[4], IN[2], SAMP[3], 2D 90: MUL TEMP[4].xyz, TEMP[4], CONST[15].wwww 91: MUL TEMP[4].xyz, TEMP[4], CONST[15].zzzz 92: MAD TEMP[4].xyz, TEMP[4], CONST[5].wwww, CONST[5] 93: DP3 TEMP[1].x, IN[3], IN[3] 94: RSQ TEMP[1].x, TEMP[1].xxxx 95: MIN TEMP[1].x, IMM[4].zzzz, TEMP[1].xxxx 96: MUL TEMP[5].xyz, IN[3], TEMP[1].xxxx 97: DP3_SAT TEMP[0].w, TEMP[2], TEMP[5] 98: DP3_SAT TEMP[2].x, TEMP[6], TEMP[5] 99: POW TEMP[2].y, |TEMP[0].wwww|, IMM[3].wwww 100: ADD TEMP[0].w, TEMP[0].wwww, IMM[2].xxxx 101: MUL TEMP[2].y, TEMP[2].yyyy, IMM[5].wwww 102: MUL TEMP[2].yzw, TEMP[4].xxyz, TEMP[2].yyyy 103: CMP TEMP[2].yzw, TEMP[0].wwww, IMM[0].wwww, TEMP[2] 104: ADD TEMP[0].w, TEMP[2].xxxx, IMM[2].xxxx 105: CMP TEMP[0].w, TEMP[0].wwww, IMM[0].wwww, TEMP[2].xxxx 106: MAD TEMP[2].xyz, TEMP[0], TEMP[0].wwww, TEMP[2].yzww 107: MAD TEMP[2].xyz, TEMP[2], CONST[16], TEMP[3] 108: MAD OUT[0].xyz, TEMP[0], CONST[17], TEMP[2] 109: MUL OUT[0].w, IMM[4].xxxx, IN[4].wwww 110: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 252) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %79 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %80 = load <32 x i8>, <32 x i8> addrspace(2)* %79, align 32, !tbaa !0 %81 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %84 = bitcast <8 x i32> addrspace(2)* %83 to <32 x i8> addrspace(2)* %85 = load <32 x i8>, <32 x i8> addrspace(2)* %84, align 32, !tbaa !0 %86 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %87 = bitcast <4 x i32> addrspace(2)* %86 to <16 x i8> addrspace(2)* %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %90 = bitcast <8 x i32> addrspace(2)* %89 to <32 x i8> addrspace(2)* %91 = load <32 x i8>, <32 x i8> addrspace(2)* %90, align 32, !tbaa !0 %92 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %93 = bitcast <4 x i32> addrspace(2)* %92 to <16 x i8> addrspace(2)* %94 = load <16 x i8>, <16 x i8> addrspace(2)* %93, align 16, !tbaa !0 %95 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %96 = bitcast <8 x i32> addrspace(2)* %95 to <32 x i8> addrspace(2)* %97 = load <32 x i8>, <32 x i8> addrspace(2)* %96, align 32, !tbaa !0 %98 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %99 = bitcast <4 x i32> addrspace(2)* %98 to <16 x i8> addrspace(2)* %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !tbaa !0 %101 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %117 = bitcast float %108 to i32 %118 = bitcast float %109 to i32 %119 = insertelement <2 x i32> undef, i32 %117, i32 0 %120 = insertelement <2 x i32> %119, i32 %118, i32 1 %121 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %120, <32 x i8> %80, <16 x i8> %82, i32 2) %122 = extractelement <4 x float> %121, i32 0 %123 = extractelement <4 x float> %121, i32 1 %124 = extractelement <4 x float> %121, i32 2 %125 = fmul float %122, 2.000000e+00 %126 = fadd float %125, -1.000000e+00 %127 = fmul float %123, 2.000000e+00 %128 = fadd float %127, -1.000000e+00 %129 = fmul float %124, 2.000000e+00 %130 = fadd float %129, -1.000000e+00 %131 = fmul float %114, %114 %132 = fmul float %115, %115 %133 = fadd float %132, %131 %134 = fmul float %116, %116 %135 = fadd float %133, %134 %136 = call float @llvm.AMDGPU.rsq.clamped.f32(float %135) %137 = call float @llvm.minnum.f32(float %136, float 0x47EFFFFFE0000000) %138 = fmul float %114, %137 %139 = fmul float %115, %137 %140 = fmul float %116, %137 %141 = fmul float %126, %138 %142 = fmul float %128, %139 %143 = fadd float %142, %141 %144 = fmul float %130, %140 %145 = fadd float %143, %144 %146 = call float @llvm.maxnum.f32(float %145, float 0.000000e+00) %147 = call float @llvm.AMDIL.clamp.(float %146, float 0.000000e+00, float 1.000000e+00) %148 = fsub float 1.000000e+00, %146 %149 = fadd float %147, 0xBEB0C6F7A0000000 %150 = call float @fabs(float %147) %151 = call float @llvm.pow.f32(float %150, float %67) %152 = call float @llvm.AMDGPU.cndlt(float %149, float 0.000000e+00, float %151) %153 = fmul float %152, %62 %154 = fmul float %152, %63 %155 = fmul float %152, %64 %156 = bitcast float %108 to i32 %157 = bitcast float %109 to i32 %158 = insertelement <2 x i32> undef, i32 %156, i32 0 %159 = insertelement <2 x i32> %158, i32 %157, i32 1 %160 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %159, <32 x i8> %91, <16 x i8> %94, i32 2) %161 = extractelement <4 x float> %160, i32 0 %162 = extractelement <4 x float> %160, i32 1 %163 = extractelement <4 x float> %160, i32 2 %164 = fmul float %161, 0x3FD3333340000000 %165 = fmul float %162, 0x3FE2E147A0000000 %166 = fadd float %165, %164 %167 = fmul float %163, 0x3FBC28F5C0000000 %168 = fadd float %166, %167 %169 = call float @llvm.AMDGPU.lrp(float %66, float %168, float %161) %170 = call float @llvm.AMDGPU.lrp(float %66, float %168, float %162) %171 = call float @llvm.AMDGPU.lrp(float %66, float %168, float %163) %172 = call float @llvm.AMDGPU.lrp(float %68, float %168, float %161) %173 = call float @llvm.AMDGPU.lrp(float %68, float %168, float %162) %174 = call float @llvm.AMDGPU.lrp(float %68, float %168, float %163) %175 = fadd float %172, %172 %176 = fadd float %173, %173 %177 = fadd float %174, %174 %178 = call float @fabs(float %175) %179 = call float @llvm.maxnum.f32(float %178, float 0x3EB0C6F7A0000000) %180 = call float @fabs(float %176) %181 = call float @llvm.maxnum.f32(float %180, float 0x3EB0C6F7A0000000) %182 = call float @fabs(float %177) %183 = call float @llvm.maxnum.f32(float %182, float 0x3EB0C6F7A0000000) %184 = fmul float %169, %153 %185 = fsub float %184, %161 %186 = fmul float %170, %154 %187 = fsub float %186, %162 %188 = fmul float %171, %155 %189 = fsub float %188, %163 %190 = bitcast float %108 to i32 %191 = bitcast float %109 to i32 %192 = insertelement <2 x i32> undef, i32 %190, i32 0 %193 = insertelement <2 x i32> %192, i32 %191, i32 1 %194 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %193, <32 x i8> %85, <16 x i8> %88, i32 2) %195 = extractelement <4 x float> %194, i32 1 %196 = extractelement <4 x float> %194, i32 2 %197 = fmul float %152, %195 %198 = fmul float %197, %185 %199 = fadd float %198, %161 %200 = fmul float %197, %187 %201 = fadd float %200, %162 %202 = fmul float %197, %189 %203 = fadd float %202, %163 %204 = fmul float %179, %179 %205 = fsub float %204, %199 %206 = fmul float %181, %181 %207 = fsub float %206, %201 %208 = fmul float %183, %183 %209 = fsub float %208, %203 %210 = fmul float %126, %126 %211 = fmul float %128, %128 %212 = fadd float %211, %210 %213 = fmul float %130, %130 %214 = fadd float %212, %213 %215 = call float @llvm.AMDGPU.rsq.clamped.f32(float %214) %216 = call float @llvm.minnum.f32(float %215, float 0x47EFFFFFE0000000) %217 = fmul float %126, %216 %218 = fmul float %128, %216 %219 = fmul float %130, %216 %220 = fmul float %217, %138 %221 = fmul float %218, %139 %222 = fadd float %221, %220 %223 = fmul float %219, %140 %224 = fadd float %222, %223 %225 = fmul float %224, %217 %226 = fmul float %224, %218 %227 = fmul float %224, %219 %228 = fmul float %225, 2.000000e+00 %229 = fsub float %228, %138 %230 = fmul float %226, 2.000000e+00 %231 = fsub float %230, %139 %232 = fmul float %227, 2.000000e+00 %233 = fsub float %232, %140 %234 = fmul float %229, %126 %235 = fmul float %231, %128 %236 = fadd float %235, %234 %237 = fmul float %233, %130 %238 = fadd float %236, %237 %239 = call float @llvm.maxnum.f32(float %238, float 0.000000e+00) %240 = call float @llvm.minnum.f32(float %239, float 0x3FE99999A0000000) %241 = fmul float %240, %69 %242 = fmul float %241, %196 %243 = fmul float %242, %205 %244 = fadd float %243, %199 %245 = fmul float %242, %207 %246 = fadd float %245, %201 %247 = fmul float %242, %209 %248 = fadd float %247, %203 %249 = call float @fabs(float %148) %250 = call float @fabs(float %148) %251 = fmul float %249, %250 %252 = call float @fabs(float %148) %253 = fmul float %251, %252 %254 = call float @fabs(float %148) %255 = fadd float %254, 0xBEB0C6F7A0000000 %256 = fmul float %253, %65 %257 = fmul float %196, %256 %258 = fmul float %161, %257 %259 = fmul float %162, %257 %260 = fmul float %163, %257 %261 = call float @llvm.AMDGPU.cndlt(float %255, float 0.000000e+00, float %258) %262 = call float @llvm.AMDGPU.cndlt(float %255, float 0.000000e+00, float %259) %263 = call float @llvm.AMDGPU.cndlt(float %255, float 0.000000e+00, float %260) %264 = fadd float %244, %261 %265 = fadd float %246, %262 %266 = fadd float %248, %263 %267 = fmul float %264, 0x3FD3333340000000 %268 = fmul float %265, 0x3FE2E147A0000000 %269 = fadd float %268, %267 %270 = fmul float %266, 0x3FBC28F5C0000000 %271 = fadd float %269, %270 %272 = call float @llvm.AMDGPU.lrp(float %70, float %271, float %264) %273 = call float @llvm.AMDGPU.lrp(float %70, float %271, float %265) %274 = call float @llvm.AMDGPU.lrp(float %70, float %271, float %266) %275 = fmul float %272, %71 %276 = fmul float %273, %71 %277 = fmul float %274, %71 %278 = fsub float 1.000000e+00, %24 %279 = fsub float 1.000000e+00, %25 %280 = fsub float 1.000000e+00, %26 %281 = fmul float %275, %278 %282 = fmul float %276, %279 %283 = fmul float %277, %280 %284 = fmul float %281, %30 %285 = fadd float %284, %27 %286 = fmul float %282, %30 %287 = fadd float %286, %28 %288 = fmul float %283, %30 %289 = fadd float %288, %29 %290 = fmul float %104, %104 %291 = fmul float %105, %105 %292 = fadd float %291, %290 %293 = fmul float %106, %106 %294 = fadd float %292, %293 %295 = call float @llvm.AMDGPU.rsq.clamped.f32(float %294) %296 = call float @llvm.minnum.f32(float %295, float 0x47EFFFFFE0000000) %297 = fmul float %104, %296 %298 = fmul float %105, %296 %299 = fmul float %106, %296 %300 = fmul float %101, %101 %301 = fmul float %102, %102 %302 = fadd float %301, %300 %303 = fmul float %103, %103 %304 = fadd float %302, %303 %305 = call float @llvm.AMDGPU.rsq.clamped.f32(float %304) %306 = call float @llvm.minnum.f32(float %305, float 0x47EFFFFFE0000000) %307 = fmul float %101, %306 %308 = fmul float %102, %306 %309 = fmul float %103, %306 %310 = fmul float %299, %308 %311 = fmul float %297, %309 %312 = fmul float %298, %307 %313 = fmul float %298, %309 %314 = fsub float %313, %310 %315 = fmul float %299, %307 %316 = fsub float %315, %311 %317 = fmul float %297, %308 %318 = fsub float %317, %312 %319 = fmul float %297, %217 %320 = fmul float %298, %218 %321 = fadd float %320, %319 %322 = fmul float %299, %219 %323 = fadd float %321, %322 %324 = fmul float %307, %217 %325 = fmul float %308, %218 %326 = fadd float %325, %324 %327 = fmul float %309, %219 %328 = fadd float %326, %327 %329 = fmul float %314, %107 %330 = fmul float %316, %107 %331 = fmul float %318, %107 %332 = fmul float %329, %217 %333 = fmul float %330, %218 %334 = fadd float %333, %332 %335 = fmul float %331, %219 %336 = fadd float %334, %335 %337 = fmul float %336, %323 %338 = fmul float %328, %323 %339 = fmul float %336, %336 %340 = fmul float %323, %323 %341 = fmul float %328, %336 %342 = fmul float %336, 0xBFF05F8CE0000000 %343 = fmul float %323, 0x3FF05F8CE0000000 %344 = fmul float %328, 0xBFF05F8CE0000000 %345 = fmul float %328, %328 %346 = fsub float %345, %339 %347 = fmul float %340, 3.000000e+00 %348 = fadd float %347, -1.000000e+00 %349 = fmul float %341, 0x3FEB756F20000000 %350 = fmul float %337, 0xBFEB756F20000000 %351 = fmul float %348, 0x3FCFB4E7C0000000 %352 = fmul float %338, 0xBFEB756F20000000 %353 = fmul float %346, 0x3FDB756F20000000 %354 = fmul float %42, %350 %355 = fmul float %43, %351 %356 = fadd float %354, %355 %357 = fmul float %44, %352 %358 = fadd float %356, %357 %359 = fmul float %45, %353 %360 = fadd float %358, %359 %361 = fmul float %38, %342 %362 = fmul float %39, %343 %363 = fadd float %361, %362 %364 = fmul float %40, %344 %365 = fadd float %363, %364 %366 = fmul float %41, %349 %367 = fadd float %365, %366 %368 = fmul float %35, 0x3FEC5BFA00000000 %369 = fadd float %368, %367 %370 = fadd float %360, %369 %371 = fmul float %50, %350 %372 = fmul float %51, %351 %373 = fadd float %371, %372 %374 = fmul float %52, %352 %375 = fadd float %373, %374 %376 = fmul float %53, %353 %377 = fadd float %375, %376 %378 = fmul float %58, %350 %379 = fmul float %59, %351 %380 = fadd float %378, %379 %381 = fmul float %60, %352 %382 = fadd float %380, %381 %383 = fmul float %61, %353 %384 = fadd float %382, %383 %385 = fmul float %46, %342 %386 = fmul float %47, %343 %387 = fadd float %385, %386 %388 = fmul float %48, %344 %389 = fadd float %387, %388 %390 = fmul float %49, %349 %391 = fadd float %389, %390 %392 = fmul float %54, %342 %393 = fmul float %55, %343 %394 = fadd float %392, %393 %395 = fmul float %56, %344 %396 = fadd float %394, %395 %397 = fmul float %57, %349 %398 = fadd float %396, %397 %399 = fmul float %37, 0x3FEC5BFA00000000 %400 = fadd float %399, %398 %401 = fadd float %384, %400 %402 = fmul float %36, 0x3FEC5BFA00000000 %403 = fadd float %402, %391 %404 = fadd float %377, %403 %405 = call float @llvm.maxnum.f32(float %370, float 0.000000e+00) %406 = call float @llvm.maxnum.f32(float %404, float 0.000000e+00) %407 = call float @llvm.maxnum.f32(float %401, float 0.000000e+00) %408 = fmul float %285, %405 %409 = fadd float %408, %24 %410 = fmul float %287, %406 %411 = fadd float %410, %25 %412 = fmul float %289, %407 %413 = fadd float %412, %26 %414 = bitcast float %108 to i32 %415 = bitcast float %109 to i32 %416 = insertelement <2 x i32> undef, i32 %414, i32 0 %417 = insertelement <2 x i32> %416, i32 %415, i32 1 %418 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %417, <32 x i8> %97, <16 x i8> %100, i32 2) %419 = extractelement <4 x float> %418, i32 0 %420 = extractelement <4 x float> %418, i32 1 %421 = extractelement <4 x float> %418, i32 2 %422 = fmul float %419, %72 %423 = fmul float %420, %72 %424 = fmul float %421, %72 %425 = fmul float %422, %71 %426 = fmul float %423, %71 %427 = fmul float %424, %71 %428 = fmul float %425, %34 %429 = fadd float %428, %31 %430 = fmul float %426, %34 %431 = fadd float %430, %32 %432 = fmul float %427, %34 %433 = fadd float %432, %33 %434 = fmul float %110, %110 %435 = fmul float %111, %111 %436 = fadd float %435, %434 %437 = fmul float %112, %112 %438 = fadd float %436, %437 %439 = call float @llvm.AMDGPU.rsq.clamped.f32(float %438) %440 = call float @llvm.minnum.f32(float %439, float 0x47EFFFFFE0000000) %441 = fmul float %110, %440 %442 = fmul float %111, %440 %443 = fmul float %112, %440 %444 = fmul float %229, %441 %445 = fmul float %231, %442 %446 = fadd float %445, %444 %447 = fmul float %233, %443 %448 = fadd float %446, %447 %449 = call float @llvm.AMDIL.clamp.(float %448, float 0.000000e+00, float 1.000000e+00) %450 = fmul float %217, %441 %451 = fmul float %218, %442 %452 = fadd float %451, %450 %453 = fmul float %219, %443 %454 = fadd float %452, %453 %455 = call float @llvm.AMDIL.clamp.(float %454, float 0.000000e+00, float 1.000000e+00) %456 = call float @fabs(float %449) %457 = call float @llvm.pow.f32(float %456, float 1.500000e+01) %458 = fadd float %449, 0xBEB0C6F7A0000000 %459 = fmul float %457, 0x3FED48D5A0000000 %460 = fmul float %429, %459 %461 = fmul float %431, %459 %462 = fmul float %433, %459 %463 = call float @llvm.AMDGPU.cndlt(float %458, float 0.000000e+00, float %460) %464 = call float @llvm.AMDGPU.cndlt(float %458, float 0.000000e+00, float %461) %465 = call float @llvm.AMDGPU.cndlt(float %458, float 0.000000e+00, float %462) %466 = fadd float %455, 0xBEB0C6F7A0000000 %467 = call float @llvm.AMDGPU.cndlt(float %466, float 0.000000e+00, float %455) %468 = fmul float %285, %467 %469 = fadd float %468, %463 %470 = fmul float %287, %467 %471 = fadd float %470, %464 %472 = fmul float %289, %467 %473 = fadd float %472, %465 %474 = fmul float %469, %73 %475 = fadd float %474, %409 %476 = fmul float %471, %74 %477 = fadd float %476, %411 %478 = fmul float %473, %75 %479 = fadd float %478, %413 %480 = fmul float %285, %76 %481 = fadd float %480, %475 %482 = fmul float %287, %77 %483 = fadd float %482, %477 %484 = fmul float %289, %78 %485 = fadd float %484, %479 %486 = fmul float %113, 3.906250e-03 %487 = call i32 @llvm.SI.packf16(float %481, float %483) %488 = bitcast i32 %487 to float %489 = call i32 @llvm.SI.packf16(float %485, float %486) %490 = bitcast i32 %489 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %488, float %490, float %488, float %490) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 s_load_dwordx4 s[48:51], s[4:5], 0x0 ; C0980500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s2, s[8:11], 0x0 ; C2010900 s_buffer_load_dword s1, s[8:11], 0x1 ; C2008901 s_buffer_load_dword s0, s[8:11], 0x2 ; C2000902 s_buffer_load_dword s16, s[8:11], 0x10 ; C2080910 s_buffer_load_dword s15, s[8:11], 0x11 ; C2078911 s_buffer_load_dword s13, s[8:11], 0x12 ; C2068912 s_buffer_load_dword s14, s[8:11], 0x13 ; C2070913 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700 v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701 v_mov_b32_e32 v9, 0xb58637bd ; 7E1202FF B58637BD v_mov_b32_e32 v10, 0x7fffffff ; 7E1402FF 7FFFFFFF v_mov_b32_e32 v11, 0x3f170a3d ; 7E1602FF 3F170A3D v_mov_b32_e32 v12, 0x3de147ae ; 7E1802FF 3DE147AE v_mov_b32_e32 v13, 0x358637bd ; 7E1A02FF 358637BD s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v14, 1.0, s2 ; D208000E 000004F2 v_sub_f32_e64 v15, 1.0, s1 ; D208000F 000002F2 v_sub_f32_e64 v16, 1.0, s0 ; D2080010 000000F2 v_mul_f32_e32 v17, v5, v5 ; 10220B05 v_mad_f32 v17, v6, v6, v17 ; D2820011 04460D06 v_mad_f32 v17, v7, v7, v17 ; D2820011 04460F07 v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 s_buffer_load_dword s34, s[8:11], 0x36 ; C2110936 s_buffer_load_dword s33, s[8:11], 0x38 ; C2108938 s_buffer_load_dword s35, s[8:11], 0x39 ; C2118939 s_buffer_load_dword s72, s[8:11], 0x3a ; C224093A s_buffer_load_dword s19, s[8:11], 0x3b ; C209893B s_buffer_load_dword s32, s[8:11], 0x3c ; C210093C s_buffer_load_dword s17, s[8:11], 0x3d ; C208893D s_buffer_load_dword s12, s[8:11], 0x3e ; C206093E s_buffer_load_dword s18, s[8:11], 0x3f ; C209093F s_buffer_load_dword s3, s[8:11], 0x40 ; C2018940 s_buffer_load_dword s73, s[8:11], 0x34 ; C2248934 s_buffer_load_dword s74, s[8:11], 0x35 ; C2250935 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v18, 1.0, s35 ; D2080012 000046F2 v_interp_p1_f32 v19, v0, 0, 2, [m0] ; C84C0800 v_interp_p2_f32 v19, [v19], v1, 0, 2, [m0] ; C84D0801 v_interp_p1_f32 v20, v0, 1, 2, [m0] ; C8500900 v_interp_p2_f32 v20, [v20], v1, 1, 2, [m0] ; C8510901 v_interp_p1_f32 v21, v0, 0, 3, [m0] ; C8540C00 v_interp_p2_f32 v21, [v21], v1, 0, 3, [m0] ; C8550C01 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 s_load_dwordx4 s[52:55], s[4:5], 0x8 ; C09A0508 s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C v_interp_p1_f32 v22, v0, 1, 3, [m0] ; C8580D00 v_interp_p2_f32 v22, [v22], v1, 1, 3, [m0] ; C8590D01 v_interp_p1_f32 v23, v0, 2, 3, [m0] ; C85C0E00 v_interp_p2_f32 v23, [v23], v1, 2, 3, [m0] ; C85D0E01 v_interp_p1_f32 v24, v0, 3, 4, [m0] ; C8601300 v_interp_p2_f32 v24, [v24], v1, 3, 4, [m0] ; C8611301 v_interp_p1_f32 v25, v0, 0, 5, [m0] ; C8641400 v_interp_p2_f32 v25, [v25], v1, 0, 5, [m0] ; C8651401 v_interp_p1_f32 v26, v0, 1, 5, [m0] ; C8681500 v_interp_p2_f32 v26, [v26], v1, 1, 5, [m0] ; C8691501 v_interp_p1_f32 v0, v0, 2, 5, [m0] ; C8001600 v_interp_p2_f32 v0, [v0], v1, 2, 5, [m0] ; C8011601 s_load_dwordx8 s[64:71], s[6:7], 0x0 ; C0E00700 s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 s_load_dwordx8 s[56:63], s[6:7], 0x10 ; C0DC0710 s_load_dwordx8 s[24:31], s[6:7], 0x18 ; C0CC0718 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[27:29], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[64:71], s[48:51] ; F0800700 01901B13 v_mul_f32_e32 v1, v25, v25 ; 10023319 v_mad_f32 v1, v26, v26, v1 ; D2820001 0406351A v_mad_f32 v1, v0, v0, v1 ; D2820001 04060100 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v27, 2.0, v27, -1.0 ; D282001B 03CE36F4 v_mad_f32 v28, 2.0, v28, -1.0 ; D282001C 03CE38F4 v_mad_f32 v29, 2.0, v29, -1.0 ; D282001D 03CE3AF4 v_min_f32_e32 v1, 0x7f7fffff, v1 ; 1E0202FF 7F7FFFFF v_mul_f32_e32 v30, v1, v25 ; 103C3301 v_mul_f32_e32 v31, v1, v26 ; 103E3501 v_mul_f32_e32 v32, v30, v27 ; 1040371E v_mad_f32 v32, v28, v31, v32 ; D2820020 04823F1C v_mul_f32_e32 v33, v1, v0 ; 10420101 v_mad_f32 v32, v29, v33, v32 ; D2820020 0482431D v_max_f32_e32 v32, 0, v32 ; 20404080 v_add_f32_e64 v34, 0, v32 clamp ; D2060822 00024080 v_and_b32_e32 v35, v34, v10 ; 36461522 v_log_f32_e32 v35, v35 ; 7E464F23 image_sample v[36:38], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[56:63], s[52:55] ; F0800700 01AE2413 v_mul_legacy_f32_e32 v35, s72, v35 ; 0E464648 v_add_f32_e32 v34, v9, v34 ; 06444509 v_exp_f32_e32 v35, v35 ; 7E464B23 v_cmp_gt_f32_e32 vcc, 0, v34 ; 7C084480 v_cndmask_b32_e64 v34, v35, 0, vcc ; D2000022 01A90123 v_mul_f32_e32 v35, s73, v34 ; 10464449 v_mul_f32_e32 v39, s74, v34 ; 104E444A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v40, 0x3e99999a, v36 ; 105048FF 3E99999A v_mad_f32 v40, v37, v11, v40 ; D2820028 04A21725 v_mad_f32 v40, v38, v12, v40 ; D2820028 04A21926 v_sub_f32_e64 v41, 1.0, s19 ; D2080029 000026F2 v_mul_f32_e32 v42, v36, v41 ; 10545324 v_mad_f32 v42, s19, v40, v42 ; D282002A 04AA5013 v_mul_f32_e32 v43, v37, v41 ; 10565325 v_mad_f32 v43, s19, v40, v43 ; D282002B 04AE5013 v_mul_f32_e32 v44, v38, v41 ; 10585326 v_mad_f32 v44, s19, v40, v44 ; D282002C 04B25013 v_mad_f32 v42, v41, v36, v42 ; D282002A 04AA4929 v_mad_f32 v43, v41, v37, v43 ; D282002B 04AE4B29 v_mad_f32 v41, v41, v38, v44 ; D2820029 04B24D29 v_mul_f32_e32 v44, v36, v18 ; 10582524 v_mad_f32 v44, s35, v40, v44 ; D282002C 04B25023 v_mad_f32 v35, v44, v35, -v36 ; D2820023 8492472C v_mul_f32_e32 v44, v37, v18 ; 10582525 v_mad_f32 v44, s35, v40, v44 ; D282002C 04B25023 v_mad_f32 v39, v44, v39, -v37 ; D2820027 84964F2C v_mul_f32_e32 v18, v38, v18 ; 10242526 v_mad_f32 v18, s35, v40, v18 ; D2820012 044A5023 v_mul_f32_e32 v44, s34, v34 ; 10584422 v_mad_f32 v18, v18, v44, -v38 ; D2820012 849A5912 image_sample v[44:45], 6, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[40:47], s[36:39] ; F0800600 012A2C13 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v34, v44, v34 ; 1044452C v_mad_f32 v35, v34, v35, v36 ; D2820023 04924722 v_mad_f32 v39, v34, v39, v37 ; D2820027 04964F22 v_mad_f32 v18, v34, v18, v38 ; D2820012 049A2522 v_sub_f32_e32 v32, 1.0, v32 ; 084040F2 v_mul_f32_e64 v34, |v32|, |v32| ; D2100322 00024120 v_mul_f32_e64 v34, v34, |v32| ; D2100222 00024122 v_mul_f32_e32 v34, s33, v34 ; 10444421 v_mul_f32_e32 v44, v27, v27 ; 1058371B v_mad_f32 v44, v28, v28, v44 ; D282002C 04B2391C v_mad_f32 v44, v29, v29, v44 ; D282002C 04B23B1D v_rsq_clamp_f32_e32 v44, v44 ; 7E58592C v_mul_f32_e32 v34, v34, v45 ; 10445B22 v_mul_f32_e32 v36, v34, v36 ; 10484922 v_mul_f32_e32 v37, v34, v37 ; 104A4B22 v_mul_f32_e32 v34, v34, v38 ; 10444D22 v_min_f32_e32 v38, 0x7f7fffff, v44 ; 1E4C58FF 7F7FFFFF v_mul_f32_e32 v44, v38, v27 ; 10583726 v_mul_f32_e32 v30, v30, v44 ; 103C591E v_mul_f32_e32 v46, v38, v28 ; 105C3926 v_mad_f32 v30, v46, v31, v30 ; D282001E 047A3F2E v_mul_f32_e32 v31, v38, v29 ; 103E3B26 v_mad_f32 v30, v31, v33, v30 ; D282001E 047A431F v_mul_f32_e32 v33, v44, v30 ; 10423D2C v_mad_f32 v33, v30, v44, v33 ; D2820021 0486591E v_mad_f32 v25, -v25, v1, v33 ; D2820019 24860319 v_mul_f32_e32 v33, v46, v30 ; 10423D2E v_mad_f32 v33, v30, v46, v33 ; D2820021 04865D1E v_mad_f32 v26, -v26, v1, v33 ; D282001A 2486031A v_mul_f32_e32 v33, v31, v30 ; 10423D1F v_mad_f32 v30, v30, v31, v33 ; D282001E 04863F1E v_mad_f32 v0, -v0, v1, v30 ; D2820000 247A0300 v_mul_f32_e32 v1, v27, v25 ; 1002331B v_mad_f32 v1, v26, v28, v1 ; D2820001 0406391A v_mad_f32 v1, v0, v29, v1 ; D2820001 04063B00 v_max_f32_e32 v1, 0, v1 ; 20020280 v_min_f32_e32 v1, 0x3f4ccccd, v1 ; 1E0202FF 3F4CCCCD v_mul_f32_e32 v1, s32, v1 ; 10020220 v_mul_f32_e32 v1, v45, v1 ; 1002032D v_mad_f32 v27, s19, v40, v42 ; D282001B 04AA5013 v_mad_f32 v28, s19, v40, v43 ; D282001C 04AE5013 v_mad_f32 v29, s19, v40, v41 ; D282001D 04A65013 v_max_f32_e64 v27, |v27|, v13 ; D220011B 00021B1B v_max_f32_e64 v28, |v28|, v13 ; D220011C 00021B1C v_max_f32_e64 v13, |v29|, v13 ; D220010D 00021B1D v_mad_f32 v27, v27, v27, -v35 ; D282001B 848E371B v_mad_f32 v27, v1, v27, v35 ; D282001B 048E3701 v_mad_f32 v28, v28, v28, -v39 ; D282001C 849E391C v_mad_f32 v28, v1, v28, v39 ; D282001C 049E3901 v_mad_f32 v13, v13, v13, -v18 ; D282000D 844A1B0D v_mad_f32 v1, v1, v13, v18 ; D2820001 044A1B01 image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[24:31], s[20:23] ; F0800700 00A61213 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v13, s18, v18 ; 101A2412 v_mul_f32_e32 v18, s18, v19 ; 10242612 v_mul_f32_e32 v19, s18, v20 ; 10262812 v_add_f32_e64 v20, |v32|, v9 ; D2060114 00021320 v_cmp_gt_f32_e32 vcc, 0, v20 ; 7C082880 v_cndmask_b32_e64 v20, v36, 0, vcc ; D2000014 01A90124 v_add_f32_e32 v20, v20, v27 ; 06283714 v_cndmask_b32_e64 v27, v37, 0, vcc ; D200001B 01A90125 v_add_f32_e32 v27, v27, v28 ; 0636391B v_cndmask_b32_e64 v28, v34, 0, vcc ; D200001C 01A90122 v_add_f32_e32 v1, v28, v1 ; 0602031C v_mul_f32_e32 v28, 0x3e99999a, v20 ; 103828FF 3E99999A v_mad_f32 v11, v27, v11, v28 ; D282000B 0472171B v_mad_f32 v11, v1, v12, v11 ; D282000B 042E1901 v_sub_f32_e64 v12, 1.0, s17 ; D208000C 000022F2 v_mul_f32_e32 v20, v20, v12 ; 10281914 v_mul_f32_e32 v27, v27, v12 ; 1036191B v_mul_f32_e32 v1, v1, v12 ; 10021901 v_mad_f32 v12, s17, v11, v20 ; D282000C 04521611 v_mad_f32 v20, s17, v11, v27 ; D2820014 046E1611 v_mad_f32 v1, s17, v11, v1 ; D2820001 04061611 v_mul_f32_e32 v11, s12, v12 ; 1016180C v_mul_f32_e32 v11, v14, v11 ; 1016170E v_mul_f32_e32 v12, s12, v20 ; 1018280C v_mul_f32_e32 v12, v15, v12 ; 1018190F v_mul_f32_e32 v1, s12, v1 ; 1002020C v_mul_f32_e32 v1, v16, v1 ; 10020310 s_buffer_load_dword s4, s[8:11], 0x14 ; C2020914 s_buffer_load_dword s5, s[8:11], 0x15 ; C2028915 s_buffer_load_dword s6, s[8:11], 0x16 ; C2030916 v_mov_b32_e32 v14, s16 ; 7E1C0210 v_mad_f32 v11, s14, v11, v14 ; D282000B 043A160E v_mov_b32_e32 v14, s15 ; 7E1C020F v_mad_f32 v12, s14, v12, v14 ; D282000C 043A180E v_mov_b32_e32 v14, s13 ; 7E1C020D v_mad_f32 v1, s14, v1, v14 ; D2820001 043A020E v_min_f32_e32 v14, 0x7f7fffff, v17 ; 1E1C22FF 7F7FFFFF v_mul_f32_e32 v15, v2, v2 ; 101E0502 v_mad_f32 v15, v3, v3, v15 ; D282000F 043E0703 v_mad_f32 v15, v4, v4, v15 ; D282000F 043E0904 v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F v_mul_f32_e32 v5, v14, v5 ; 100A0B0E v_mul_f32_e32 v6, v14, v6 ; 100C0D0E v_mul_f32_e32 v7, v14, v7 ; 100E0F0E v_min_f32_e32 v14, 0x7f7fffff, v15 ; 1E1C1EFF 7F7FFFFF v_mul_f32_e32 v2, v14, v2 ; 1004050E v_mul_f32_e32 v3, v14, v3 ; 1006070E v_mul_f32_e32 v4, v14, v4 ; 1008090E v_mul_f32_e32 v14, v3, v7 ; 101C0F03 v_mad_f32 v14, v6, v4, -v14 ; D282000E 843A0906 v_mul_f32_e32 v15, v44, v5 ; 101E0B2C v_mad_f32 v15, v6, v46, v15 ; D282000F 043E5D06 v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mad_f32 v6, v5, v3, -v6 ; D2820006 841A0705 v_mul_f32_e32 v5, v4, v5 ; 100A0B04 v_mad_f32 v5, v7, v2, -v5 ; D2820005 84160507 v_mad_f32 v7, v7, v31, v15 ; D2820007 043E3F07 v_mul_f32_e32 v2, v44, v2 ; 1004052C v_mad_f32 v2, v3, v46, v2 ; D2820002 040A5D03 v_mad_f32 v2, v4, v31, v2 ; D2820002 040A3F04 v_mul_f32_e32 v3, v8, v14 ; 10061D08 v_mul_f32_e32 v4, v8, v5 ; 10080B08 v_mul_f32_e32 v5, v8, v6 ; 100A0D08 v_mul_f32_e32 v3, v44, v3 ; 1006072C v_mad_f32 v3, v4, v46, v3 ; D2820003 040E5D04 v_mad_f32 v3, v5, v31, v3 ; D2820003 040E3F05 v_mul_f32_e32 v4, v7, v7 ; 10080F07 v_mov_b32_e32 v5, 0x40400000 ; 7E0A02FF 40400000 v_mad_f32 v4, v5, v4, -1.0 ; D2820004 03CE0905 s_buffer_load_dword s7, s[8:11], 0x20 ; C2038920 s_buffer_load_dword s13, s[8:11], 0x21 ; C2068921 v_mul_f32_e32 v5, v7, v3 ; 100A0707 v_mov_b32_e32 v6, 0xbf5bab79 ; 7E0C02FF BF5BAB79 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_mul_f32_e32 v8, v7, v2 ; 10100507 v_mul_f32_e32 v6, v6, v8 ; 100C1106 s_buffer_load_dword s14, s[8:11], 0x29 ; C2070929 s_buffer_load_dword s15, s[8:11], 0x28 ; C2078928 v_mul_f32_e32 v4, 0x3e7da73e, v4 ; 100808FF 3E7DA73E s_buffer_load_dword s16, s[8:11], 0x30 ; C2080930 s_buffer_load_dword s17, s[8:11], 0x31 ; C2088931 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s13, v4 ; 1010080D v_mad_f32 v8, s7, v5, v8 ; D2820008 04220A07 s_buffer_load_dword s7, s[8:11], 0x22 ; C2038922 s_buffer_load_dword s13, s[8:11], 0x2a ; C206892A s_buffer_load_dword s18, s[8:11], 0x2b ; C209092B v_mul_f32_e32 v14, s14, v4 ; 101C080E v_mad_f32 v14, s15, v5, v14 ; D282000E 043A0A0F s_buffer_load_dword s14, s[8:11], 0x32 ; C2070932 s_buffer_load_dword s15, s[8:11], 0x33 ; C2078933 v_mul_f32_e32 v4, s17, v4 ; 10080811 v_mad_f32 v4, s16, v5, v4 ; D2820004 04120A10 s_buffer_load_dword s16, s[8:11], 0x23 ; C2080923 s_buffer_load_dword s17, s[8:11], 0x24 ; C2088924 s_buffer_load_dword s19, s[8:11], 0x25 ; C2098925 s_buffer_load_dword s20, s[8:11], 0x26 ; C20A0926 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s7, v6, v8 ; D2820005 04220C07 v_mad_f32 v8, s13, v6, v14 ; D2820008 043A0C0D v_mad_f32 v4, s14, v6, v4 ; D2820004 04120C0E v_mul_f32_e32 v6, v3, v3 ; 100C0703 v_mad_f32 v6, v2, v2, -v6 ; D2820006 841A0502 v_mul_f32_e32 v6, 0x3edbab79, v6 ; 100C0CFF 3EDBAB79 s_buffer_load_dword s7, s[8:11], 0x1d ; C203891D v_mad_f32 v5, s16, v6, v5 ; D2820005 04160C10 v_mad_f32 v8, s18, v6, v8 ; D2820008 04220C12 s_buffer_load_dword s13, s[8:11], 0x1c ; C206891C v_mad_f32 v4, s15, v6, v4 ; D2820004 04120C0F v_mul_f32_e32 v6, 0x3f82fc67, v7 ; 100C0EFF 3F82FC67 s_buffer_load_dword s14, s[8:11], 0x1e ; C207091E s_buffer_load_dword s15, s[8:11], 0x1f ; C207891F s_buffer_load_dword s16, s[8:11], 0x2d ; C208092D s_buffer_load_dword s18, s[8:11], 0x2c ; C209092C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s7, v6 ; 100E0C07 v_mov_b32_e32 v14, 0xbf82fc67 ; 7E1C02FF BF82FC67 v_mul_f32_e32 v15, v14, v3 ; 101E070E v_mad_f32 v7, s13, v15, v7 ; D2820007 041E1E0D s_buffer_load_dword s7, s[8:11], 0x2e ; C203892E v_mul_f32_e32 v16, s19, v6 ; 10200C13 v_mad_f32 v16, s17, v15, v16 ; D2820010 04421E11 s_buffer_load_dword s13, s[8:11], 0x2f ; C206892F v_mul_f32_e32 v6, s16, v6 ; 100C0C10 v_mad_f32 v6, s18, v15, v6 ; D2820006 041A1E12 v_mul_f32_e32 v14, v14, v2 ; 101C050E v_mad_f32 v7, s14, v14, v7 ; D2820007 041E1C0E s_buffer_load_dword s14, s[8:11], 0x27 ; C2070927 v_mad_f32 v15, s20, v14, v16 ; D282000F 04421C14 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s7, v14, v6 ; D2820006 041A1C07 s_buffer_load_dword s7, s[8:11], 0x18 ; C2038918 s_buffer_load_dword s16, s[8:11], 0x1a ; C208091A s_buffer_load_dword s17, s[8:11], 0x19 ; C2088919 v_mul_f32_e32 v2, v3, v2 ; 10040503 s_buffer_load_dword s18, s[8:11], 0x17 ; C2090917 v_mul_f32_e32 v2, 0x3f5bab79, v2 ; 100404FF 3F5BAB79 v_mad_f32 v3, s15, v2, v7 ; D2820003 041E040F v_mad_f32 v7, s14, v2, v15 ; D2820007 043E040E v_mad_f32 v2, s13, v2, v6 ; D2820002 041A040D v_mov_b32_e32 v6, 0x3f62dfd0 ; 7E0C02FF 3F62DFD0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s7, v6, v3 ; D2820003 040E0C07 v_mad_f32 v2, s16, v6, v2 ; D2820002 040A0C10 v_mad_f32 v6, s17, v6, v7 ; D2820006 041E0C11 v_mul_f32_e32 v7, s12, v13 ; 100E1A0C v_mov_b32_e32 v13, s4 ; 7E1A0204 v_mad_f32 v7, s18, v7, v13 ; D2820007 04360E12 v_mul_f32_e32 v13, s12, v18 ; 101A240C v_mul_f32_e32 v14, v21, v21 ; 101C2B15 v_mad_f32 v14, v22, v22, v14 ; D282000E 043A2D16 v_mad_f32 v14, v23, v23, v14 ; D282000E 043A2F17 v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E v_mov_b32_e32 v15, s5 ; 7E1E0205 v_mad_f32 v13, s18, v13, v15 ; D282000D 043E1A12 v_mul_f32_e32 v15, s12, v19 ; 101E260C v_mov_b32_e32 v16, s6 ; 7E200206 v_mad_f32 v15, s18, v15, v16 ; D282000F 04421E12 v_min_f32_e32 v14, 0x7f7fffff, v14 ; 1E1C1CFF 7F7FFFFF v_mul_f32_e32 v16, v14, v21 ; 10202B0E v_mul_f32_e32 v17, v14, v22 ; 10222D0E v_mul_f32_e32 v14, v14, v23 ; 101C2F0E v_mul_f32_e32 v18, v16, v44 ; 10245910 v_mad_f32 v18, v46, v17, v18 ; D2820012 044A232E v_mad_f32 v18, v31, v14, v18 ; D2820012 044A1D1F v_add_f32_e64 v18, 0, v18 clamp ; D2060812 00022480 v_add_f32_e32 v19, v9, v18 ; 06262509 v_cmp_gt_f32_e32 vcc, 0, v19 ; 7C082680 s_buffer_load_dword s4, s[8:11], 0x41 ; C2020941 s_buffer_load_dword s5, s[8:11], 0x42 ; C2028942 s_buffer_load_dword s6, s[8:11], 0x44 ; C2030944 s_buffer_load_dword s7, s[8:11], 0x45 ; C2038945 s_buffer_load_dword s8, s[8:11], 0x46 ; C2040946 v_cndmask_b32_e64 v18, v18, 0, vcc ; D2000012 01A90112 v_mul_f32_e32 v16, v16, v25 ; 10203310 v_mad_f32 v16, v26, v17, v16 ; D2820010 0442231A v_mad_f32 v0, v0, v14, v16 ; D2820000 04421D00 v_add_f32_e32 v3, v3, v5 ; 06060B03 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_and_b32_e32 v5, v0, v10 ; 360A1500 v_log_f32_e32 v5, v5 ; 7E0A4F05 v_max_f32_e32 v3, 0, v3 ; 20060680 v_mad_f32 v3, v11, v3, s2 ; D2820003 000A070B v_add_f32_e32 v0, v9, v0 ; 06000109 v_mul_legacy_f32_e32 v5, 0x41700000, v5 ; 0E0A0AFF 41700000 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_mul_f32_e32 v5, 0x3f6a46ad, v5 ; 100A0AFF 3F6A46AD v_mul_f32_e32 v7, v5, v7 ; 100E0F05 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v7, 0, vcc ; D2000000 01A90107 v_mad_f32 v0, v11, v18, v0 ; D2820000 0402250B v_mad_f32 v0, v0, s3, v3 ; D2820000 040C0700 v_mul_f32_e32 v3, v5, v13 ; 10061B05 v_mul_f32_e32 v5, v5, v15 ; 100A1F05 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_cndmask_b32_e64 v5, v5, 0, vcc ; D2000005 01A90105 v_add_f32_e32 v2, v2, v4 ; 06040902 v_add_f32_e32 v4, v6, v8 ; 06081106 v_mad_f32 v3, v12, v18, v3 ; D2820003 040E250C v_mad_f32 v5, v1, v18, v5 ; D2820005 04162501 v_max_f32_e32 v4, 0, v4 ; 20080880 v_mad_f32 v4, v12, v4, s1 ; D2820004 0006090C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v3, s4, v4 ; D2820003 04100903 v_max_f32_e32 v2, 0, v2 ; 20040480 v_mad_f32 v2, v1, v2, s0 ; D2820002 00020501 v_mad_f32 v2, v5, s5, v2 ; D2820002 04080B05 v_mad_f32 v0, v11, s6, v0 ; D2820000 04000D0B v_mad_f32 v3, v12, s7, v3 ; D2820003 040C0F0C v_mad_f32 v1, v1, s8, v2 ; D2820001 04081101 v_cvt_pkrtz_f16_f32_e32 v0, v0, v3 ; 5E000700 v_mul_f32_e32 v2, 0x3b800000, v24 ; 100430FF 3B800000 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 48 Code Size: 2252 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], COLOR, PERSPECTIVE DCL IN[1], COLOR[1], PERSPECTIVE DCL IN[2], TEXCOORD[0], PERSPECTIVE DCL IN[3], TEXCOORD[4], PERSPECTIVE DCL IN[4], TEXCOORD[5], PERSPECTIVE DCL IN[5], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL CONST[0..17] DCL TEMP[0] DCL TEMP[1], LOCAL DCL TEMP[2..6] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, -0.0000} IMM[1] FLT32 { 0.3000, 0.5900, 0.1100, 0.0000} IMM[2] FLT32 { 0.8862, 15.0000, 0.9151, 0.0039} IMM[3] FLT32 { 3.0000, -1.0000, -1.0233, 1.0233} IMM[4] FLT32 { 0.8581, -0.8581, 0.2477, 0.4290} IMM[5] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: TEX TEMP[0], IN[2], SAMP[0], 2D 1: MAD TEMP[0].xyz, TEMP[0], IMM[0].xxxx, IMM[0].yyyy 2: DP3 TEMP[1].x, IN[5], IN[5] 3: RSQ TEMP[1].x, TEMP[1].xxxx 4: MIN TEMP[1].x, IMM[5].xxxx, TEMP[1].xxxx 5: MUL TEMP[2].xyz, IN[5], TEMP[1].xxxx 6: DP3_SAT TEMP[0].w, TEMP[0], TEMP[2] 7: DP3 TEMP[1].x, TEMP[0], TEMP[0] 8: RSQ TEMP[1].x, TEMP[1].xxxx 9: MIN TEMP[1].x, IMM[5].xxxx, TEMP[1].xxxx 10: MUL TEMP[3].xyz, TEMP[0], TEMP[1].xxxx 11: ADD TEMP[0].x, TEMP[0].wwww, IMM[0].wwww 12: POW TEMP[2].w, |TEMP[0].wwww|, CONST[14].yyyy 13: CMP TEMP[0].x, TEMP[0].xxxx, IMM[1].wwww, TEMP[2].wwww 14: MUL TEMP[0].yzw, TEMP[0].xxxx, CONST[13].xxyz 15: TEX TEMP[4], IN[2], SAMP[1], 2D 16: DP3 TEMP[2].w, TEMP[4], IMM[1] 17: LRP TEMP[5].xyz, CONST[14].xxxx, TEMP[2].wwww, TEMP[4] 18: MAD TEMP[0].yzw, TEMP[5].xxyz, TEMP[0], -TEMP[4].xxyz 19: TEX TEMP[5], IN[2], SAMP[2], 2D 20: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[5].yyyy 21: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[0].yzww, TEMP[4] 22: DP3 TEMP[0].w, TEMP[0], IMM[1] 23: LRP TEMP[4].xyz, CONST[14].zzzz, TEMP[0].wwww, TEMP[0] 24: MUL TEMP[0].xyz, TEMP[4], CONST[14].wwww 25: MOV TEMP[4].z, IMM[0].zzzz 26: ADD TEMP[4].xyz, TEMP[4].zzzz, -CONST[0] 27: MUL TEMP[0].xyz, TEMP[0], TEMP[4] 28: MAD TEMP[0].xyz, TEMP[0], CONST[4].wwww, CONST[4] 29: TEX TEMP[4], IN[2], SAMP[3], 2D 30: MUL TEMP[4].xyz, TEMP[4], CONST[15].xxxx 31: MUL TEMP[4].xyz, TEMP[4], CONST[14].wwww 32: MAD TEMP[4].xyz, TEMP[4], CONST[5].wwww, CONST[5] 33: DP3 TEMP[0].w, TEMP[3], TEMP[2] 34: MUL TEMP[5].xyz, TEMP[0].wwww, TEMP[3] 35: MAD TEMP[2].xyz, TEMP[5], IMM[0].xxxx, -TEMP[2] 36: DP3 TEMP[1].x, IN[3], IN[3] 37: RSQ TEMP[1].x, TEMP[1].xxxx 38: MIN TEMP[1].x, IMM[5].xxxx, TEMP[1].xxxx 39: MUL TEMP[5].xyz, IN[3], TEMP[1].xxxx 40: DP3_SAT TEMP[0].w, TEMP[2], TEMP[5] 41: DP3_SAT TEMP[2].x, TEMP[3], TEMP[5] 42: POW TEMP[2].y, |TEMP[0].wwww|, IMM[2].yyyy 43: ADD TEMP[0].w, TEMP[0].wwww, IMM[0].wwww 44: MUL TEMP[2].y, TEMP[2].yyyy, IMM[2].zzzz 45: MUL TEMP[2].yzw, TEMP[4].xxyz, TEMP[2].yyyy 46: CMP TEMP[2].yzw, TEMP[0].wwww, IMM[1].wwww, TEMP[2] 47: ADD TEMP[0].w, TEMP[2].xxxx, IMM[0].wwww 48: CMP TEMP[0].w, TEMP[0].wwww, IMM[1].wwww, TEMP[2].xxxx 49: MAD TEMP[2].xyz, TEMP[0], TEMP[0].wwww, TEMP[2].yzww 50: DP3 TEMP[1].x, IN[1], IN[1] 51: RSQ TEMP[1].x, TEMP[1].xxxx 52: MIN TEMP[1].x, IMM[5].xxxx, TEMP[1].xxxx 53: MUL TEMP[4].xyz, IN[1], TEMP[1].xxxx 54: DP3 TEMP[1].x, IN[0], IN[0] 55: RSQ TEMP[1].x, TEMP[1].xxxx 56: MIN TEMP[1].x, IMM[5].xxxx, TEMP[1].xxxx 57: MUL TEMP[5].xyz, IN[0], TEMP[1].xxxx 58: MUL TEMP[6].xyz, TEMP[4].zxyw, TEMP[5].yzxw 59: MAD TEMP[6].xyz, TEMP[4].yzxw, TEMP[5].zxyw, -TEMP[6] 60: DP3 TEMP[4].y, TEMP[4], TEMP[3] 61: DP3 TEMP[4].z, TEMP[5], TEMP[3] 62: MUL TEMP[5].xyz, TEMP[6], IN[1].wwww 63: DP3 TEMP[4].x, TEMP[5], TEMP[3] 64: MUL TEMP[3].xz, TEMP[4], TEMP[4].yyyy 65: MUL TEMP[5].xyz, TEMP[4], TEMP[4].xyxw 66: MUL TEMP[6].xyz, TEMP[4], IMM[3].zwzw 67: MAD TEMP[3].w, TEMP[4].zzzz, TEMP[4].zzzz, -TEMP[5].xxxx 68: MAD TEMP[3].y, TEMP[5].yyyy, IMM[3].xxxx, IMM[3].yyyy 69: MUL TEMP[6].w, TEMP[5].zzzz, IMM[4].xxxx 70: MUL TEMP[3], TEMP[3], IMM[4].yzyw 71: DP4 TEMP[0].w, CONST[8], TEMP[3] 72: DP4 TEMP[2].w, CONST[7], TEMP[6] 73: MOV TEMP[4].x, IMM[2].xxxx 74: MAD TEMP[2].w, CONST[6].xxxx, TEMP[4].xxxx, TEMP[2].wwww 75: ADD TEMP[5].x, TEMP[0].wwww, TEMP[2].wwww 76: DP4 TEMP[0].w, CONST[10], TEMP[3] 77: DP4 TEMP[2].w, CONST[12], TEMP[3] 78: DP4 TEMP[3].x, CONST[9], TEMP[6] 79: DP4 TEMP[3].y, CONST[11], TEMP[6] 80: MAD TEMP[3].y, CONST[6].zzzz, TEMP[4].xxxx, TEMP[3].yyyy 81: ADD TEMP[5].z, TEMP[2].wwww, TEMP[3].yyyy 82: MAD TEMP[2].w, CONST[6].yyyy, TEMP[4].xxxx, TEMP[3].xxxx 83: ADD TEMP[5].y, TEMP[0].wwww, TEMP[2].wwww 84: MAX TEMP[3].xyz, TEMP[5], IMM[1].wwww 85: MAD TEMP[3].xyz, TEMP[0], TEMP[3], CONST[0] 86: MAD TEMP[2].xyz, TEMP[2], CONST[16], TEMP[3] 87: MAD OUT[0].xyz, TEMP[0], CONST[17], TEMP[2] 88: MUL OUT[0].w, IMM[2].wwww, IN[4].wwww 89: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %76 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %77 = load <32 x i8>, <32 x i8> addrspace(2)* %76, align 32, !tbaa !0 %78 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %79 = load <16 x i8>, <16 x i8> addrspace(2)* %78, align 16, !tbaa !0 %80 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %81 = bitcast <8 x i32> addrspace(2)* %80 to <32 x i8> addrspace(2)* %82 = load <32 x i8>, <32 x i8> addrspace(2)* %81, align 32, !tbaa !0 %83 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %84 = bitcast <4 x i32> addrspace(2)* %83 to <16 x i8> addrspace(2)* %85 = load <16 x i8>, <16 x i8> addrspace(2)* %84, align 16, !tbaa !0 %86 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %87 = bitcast <8 x i32> addrspace(2)* %86 to <32 x i8> addrspace(2)* %88 = load <32 x i8>, <32 x i8> addrspace(2)* %87, align 32, !tbaa !0 %89 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %90 = bitcast <4 x i32> addrspace(2)* %89 to <16 x i8> addrspace(2)* %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 %92 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %93 = bitcast <8 x i32> addrspace(2)* %92 to <32 x i8> addrspace(2)* %94 = load <32 x i8>, <32 x i8> addrspace(2)* %93, align 32, !tbaa !0 %95 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %96 = bitcast <4 x i32> addrspace(2)* %95 to <16 x i8> addrspace(2)* %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 %98 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %114 = bitcast float %105 to i32 %115 = bitcast float %106 to i32 %116 = insertelement <2 x i32> undef, i32 %114, i32 0 %117 = insertelement <2 x i32> %116, i32 %115, i32 1 %118 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %117, <32 x i8> %77, <16 x i8> %79, i32 2) %119 = extractelement <4 x float> %118, i32 0 %120 = extractelement <4 x float> %118, i32 1 %121 = extractelement <4 x float> %118, i32 2 %122 = fmul float %119, 2.000000e+00 %123 = fadd float %122, -1.000000e+00 %124 = fmul float %120, 2.000000e+00 %125 = fadd float %124, -1.000000e+00 %126 = fmul float %121, 2.000000e+00 %127 = fadd float %126, -1.000000e+00 %128 = fmul float %111, %111 %129 = fmul float %112, %112 %130 = fadd float %129, %128 %131 = fmul float %113, %113 %132 = fadd float %130, %131 %133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132) %134 = call float @llvm.minnum.f32(float %133, float 0x47EFFFFFE0000000) %135 = fmul float %111, %134 %136 = fmul float %112, %134 %137 = fmul float %113, %134 %138 = fmul float %123, %135 %139 = fmul float %125, %136 %140 = fadd float %139, %138 %141 = fmul float %127, %137 %142 = fadd float %140, %141 %143 = call float @llvm.AMDIL.clamp.(float %142, float 0.000000e+00, float 1.000000e+00) %144 = fmul float %123, %123 %145 = fmul float %125, %125 %146 = fadd float %145, %144 %147 = fmul float %127, %127 %148 = fadd float %146, %147 %149 = call float @llvm.AMDGPU.rsq.clamped.f32(float %148) %150 = call float @llvm.minnum.f32(float %149, float 0x47EFFFFFE0000000) %151 = fmul float %123, %150 %152 = fmul float %125, %150 %153 = fmul float %127, %150 %154 = fadd float %143, 0xBEB0C6F7A0000000 %155 = call float @fabs(float %143) %156 = call float @llvm.pow.f32(float %155, float %66) %157 = call float @llvm.AMDGPU.cndlt(float %154, float 0.000000e+00, float %156) %158 = fmul float %157, %62 %159 = fmul float %157, %63 %160 = fmul float %157, %64 %161 = bitcast float %105 to i32 %162 = bitcast float %106 to i32 %163 = insertelement <2 x i32> undef, i32 %161, i32 0 %164 = insertelement <2 x i32> %163, i32 %162, i32 1 %165 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %164, <32 x i8> %82, <16 x i8> %85, i32 2) %166 = extractelement <4 x float> %165, i32 0 %167 = extractelement <4 x float> %165, i32 1 %168 = extractelement <4 x float> %165, i32 2 %169 = fmul float %166, 0x3FD3333340000000 %170 = fmul float %167, 0x3FE2E147A0000000 %171 = fadd float %170, %169 %172 = fmul float %168, 0x3FBC28F5C0000000 %173 = fadd float %171, %172 %174 = call float @llvm.AMDGPU.lrp(float %65, float %173, float %166) %175 = call float @llvm.AMDGPU.lrp(float %65, float %173, float %167) %176 = call float @llvm.AMDGPU.lrp(float %65, float %173, float %168) %177 = fmul float %174, %158 %178 = fsub float %177, %166 %179 = fmul float %175, %159 %180 = fsub float %179, %167 %181 = fmul float %176, %160 %182 = fsub float %181, %168 %183 = bitcast float %105 to i32 %184 = bitcast float %106 to i32 %185 = insertelement <2 x i32> undef, i32 %183, i32 0 %186 = insertelement <2 x i32> %185, i32 %184, i32 1 %187 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %186, <32 x i8> %88, <16 x i8> %91, i32 2) %188 = extractelement <4 x float> %187, i32 1 %189 = fmul float %157, %188 %190 = fmul float %189, %178 %191 = fadd float %190, %166 %192 = fmul float %189, %180 %193 = fadd float %192, %167 %194 = fmul float %189, %182 %195 = fadd float %194, %168 %196 = fmul float %191, 0x3FD3333340000000 %197 = fmul float %193, 0x3FE2E147A0000000 %198 = fadd float %197, %196 %199 = fmul float %195, 0x3FBC28F5C0000000 %200 = fadd float %198, %199 %201 = call float @llvm.AMDGPU.lrp(float %67, float %200, float %191) %202 = call float @llvm.AMDGPU.lrp(float %67, float %200, float %193) %203 = call float @llvm.AMDGPU.lrp(float %67, float %200, float %195) %204 = fmul float %201, %68 %205 = fmul float %202, %68 %206 = fmul float %203, %68 %207 = fsub float 1.000000e+00, %24 %208 = fsub float 1.000000e+00, %25 %209 = fsub float 1.000000e+00, %26 %210 = fmul float %204, %207 %211 = fmul float %205, %208 %212 = fmul float %206, %209 %213 = fmul float %210, %30 %214 = fadd float %213, %27 %215 = fmul float %211, %30 %216 = fadd float %215, %28 %217 = fmul float %212, %30 %218 = fadd float %217, %29 %219 = bitcast float %105 to i32 %220 = bitcast float %106 to i32 %221 = insertelement <2 x i32> undef, i32 %219, i32 0 %222 = insertelement <2 x i32> %221, i32 %220, i32 1 %223 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %222, <32 x i8> %94, <16 x i8> %97, i32 2) %224 = extractelement <4 x float> %223, i32 0 %225 = extractelement <4 x float> %223, i32 1 %226 = extractelement <4 x float> %223, i32 2 %227 = fmul float %224, %69 %228 = fmul float %225, %69 %229 = fmul float %226, %69 %230 = fmul float %227, %68 %231 = fmul float %228, %68 %232 = fmul float %229, %68 %233 = fmul float %230, %34 %234 = fadd float %233, %31 %235 = fmul float %231, %34 %236 = fadd float %235, %32 %237 = fmul float %232, %34 %238 = fadd float %237, %33 %239 = fmul float %151, %135 %240 = fmul float %152, %136 %241 = fadd float %240, %239 %242 = fmul float %153, %137 %243 = fadd float %241, %242 %244 = fmul float %243, %151 %245 = fmul float %243, %152 %246 = fmul float %243, %153 %247 = fmul float %244, 2.000000e+00 %248 = fsub float %247, %135 %249 = fmul float %245, 2.000000e+00 %250 = fsub float %249, %136 %251 = fmul float %246, 2.000000e+00 %252 = fsub float %251, %137 %253 = fmul float %107, %107 %254 = fmul float %108, %108 %255 = fadd float %254, %253 %256 = fmul float %109, %109 %257 = fadd float %255, %256 %258 = call float @llvm.AMDGPU.rsq.clamped.f32(float %257) %259 = call float @llvm.minnum.f32(float %258, float 0x47EFFFFFE0000000) %260 = fmul float %107, %259 %261 = fmul float %108, %259 %262 = fmul float %109, %259 %263 = fmul float %248, %260 %264 = fmul float %250, %261 %265 = fadd float %264, %263 %266 = fmul float %252, %262 %267 = fadd float %265, %266 %268 = call float @llvm.AMDIL.clamp.(float %267, float 0.000000e+00, float 1.000000e+00) %269 = fmul float %151, %260 %270 = fmul float %152, %261 %271 = fadd float %270, %269 %272 = fmul float %153, %262 %273 = fadd float %271, %272 %274 = call float @llvm.AMDIL.clamp.(float %273, float 0.000000e+00, float 1.000000e+00) %275 = call float @fabs(float %268) %276 = call float @llvm.pow.f32(float %275, float 1.500000e+01) %277 = fadd float %268, 0xBEB0C6F7A0000000 %278 = fmul float %276, 0x3FED48D5A0000000 %279 = fmul float %234, %278 %280 = fmul float %236, %278 %281 = fmul float %238, %278 %282 = call float @llvm.AMDGPU.cndlt(float %277, float 0.000000e+00, float %279) %283 = call float @llvm.AMDGPU.cndlt(float %277, float 0.000000e+00, float %280) %284 = call float @llvm.AMDGPU.cndlt(float %277, float 0.000000e+00, float %281) %285 = fadd float %274, 0xBEB0C6F7A0000000 %286 = call float @llvm.AMDGPU.cndlt(float %285, float 0.000000e+00, float %274) %287 = fmul float %214, %286 %288 = fadd float %287, %282 %289 = fmul float %216, %286 %290 = fadd float %289, %283 %291 = fmul float %218, %286 %292 = fadd float %291, %284 %293 = fmul float %101, %101 %294 = fmul float %102, %102 %295 = fadd float %294, %293 %296 = fmul float %103, %103 %297 = fadd float %295, %296 %298 = call float @llvm.AMDGPU.rsq.clamped.f32(float %297) %299 = call float @llvm.minnum.f32(float %298, float 0x47EFFFFFE0000000) %300 = fmul float %101, %299 %301 = fmul float %102, %299 %302 = fmul float %103, %299 %303 = fmul float %98, %98 %304 = fmul float %99, %99 %305 = fadd float %304, %303 %306 = fmul float %100, %100 %307 = fadd float %305, %306 %308 = call float @llvm.AMDGPU.rsq.clamped.f32(float %307) %309 = call float @llvm.minnum.f32(float %308, float 0x47EFFFFFE0000000) %310 = fmul float %98, %309 %311 = fmul float %99, %309 %312 = fmul float %100, %309 %313 = fmul float %302, %311 %314 = fmul float %300, %312 %315 = fmul float %301, %310 %316 = fmul float %301, %312 %317 = fsub float %316, %313 %318 = fmul float %302, %310 %319 = fsub float %318, %314 %320 = fmul float %300, %311 %321 = fsub float %320, %315 %322 = fmul float %300, %151 %323 = fmul float %301, %152 %324 = fadd float %323, %322 %325 = fmul float %302, %153 %326 = fadd float %324, %325 %327 = fmul float %310, %151 %328 = fmul float %311, %152 %329 = fadd float %328, %327 %330 = fmul float %312, %153 %331 = fadd float %329, %330 %332 = fmul float %317, %104 %333 = fmul float %319, %104 %334 = fmul float %321, %104 %335 = fmul float %332, %151 %336 = fmul float %333, %152 %337 = fadd float %336, %335 %338 = fmul float %334, %153 %339 = fadd float %337, %338 %340 = fmul float %339, %326 %341 = fmul float %331, %326 %342 = fmul float %339, %339 %343 = fmul float %326, %326 %344 = fmul float %331, %339 %345 = fmul float %339, 0xBFF05F8CE0000000 %346 = fmul float %326, 0x3FF05F8CE0000000 %347 = fmul float %331, 0xBFF05F8CE0000000 %348 = fmul float %331, %331 %349 = fsub float %348, %342 %350 = fmul float %343, 3.000000e+00 %351 = fadd float %350, -1.000000e+00 %352 = fmul float %344, 0x3FEB756F20000000 %353 = fmul float %340, 0xBFEB756F20000000 %354 = fmul float %351, 0x3FCFB4E7C0000000 %355 = fmul float %341, 0xBFEB756F20000000 %356 = fmul float %349, 0x3FDB756F20000000 %357 = fmul float %42, %353 %358 = fmul float %43, %354 %359 = fadd float %357, %358 %360 = fmul float %44, %355 %361 = fadd float %359, %360 %362 = fmul float %45, %356 %363 = fadd float %361, %362 %364 = fmul float %38, %345 %365 = fmul float %39, %346 %366 = fadd float %364, %365 %367 = fmul float %40, %347 %368 = fadd float %366, %367 %369 = fmul float %41, %352 %370 = fadd float %368, %369 %371 = fmul float %35, 0x3FEC5BFA00000000 %372 = fadd float %371, %370 %373 = fadd float %363, %372 %374 = fmul float %50, %353 %375 = fmul float %51, %354 %376 = fadd float %374, %375 %377 = fmul float %52, %355 %378 = fadd float %376, %377 %379 = fmul float %53, %356 %380 = fadd float %378, %379 %381 = fmul float %58, %353 %382 = fmul float %59, %354 %383 = fadd float %381, %382 %384 = fmul float %60, %355 %385 = fadd float %383, %384 %386 = fmul float %61, %356 %387 = fadd float %385, %386 %388 = fmul float %46, %345 %389 = fmul float %47, %346 %390 = fadd float %388, %389 %391 = fmul float %48, %347 %392 = fadd float %390, %391 %393 = fmul float %49, %352 %394 = fadd float %392, %393 %395 = fmul float %54, %345 %396 = fmul float %55, %346 %397 = fadd float %395, %396 %398 = fmul float %56, %347 %399 = fadd float %397, %398 %400 = fmul float %57, %352 %401 = fadd float %399, %400 %402 = fmul float %37, 0x3FEC5BFA00000000 %403 = fadd float %402, %401 %404 = fadd float %387, %403 %405 = fmul float %36, 0x3FEC5BFA00000000 %406 = fadd float %405, %394 %407 = fadd float %380, %406 %408 = call float @llvm.maxnum.f32(float %373, float 0.000000e+00) %409 = call float @llvm.maxnum.f32(float %407, float 0.000000e+00) %410 = call float @llvm.maxnum.f32(float %404, float 0.000000e+00) %411 = fmul float %214, %408 %412 = fadd float %411, %24 %413 = fmul float %216, %409 %414 = fadd float %413, %25 %415 = fmul float %218, %410 %416 = fadd float %415, %26 %417 = fmul float %288, %70 %418 = fadd float %417, %412 %419 = fmul float %290, %71 %420 = fadd float %419, %414 %421 = fmul float %292, %72 %422 = fadd float %421, %416 %423 = fmul float %214, %73 %424 = fadd float %423, %418 %425 = fmul float %216, %74 %426 = fadd float %425, %420 %427 = fmul float %218, %75 %428 = fadd float %427, %422 %429 = fmul float %110, 3.906250e-03 %430 = call i32 @llvm.SI.packf16(float %424, float %426) %431 = bitcast i32 %430 to float %432 = call i32 @llvm.SI.packf16(float %428, float %429) %433 = bitcast i32 %432 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %431, float %433, float %431, float %433) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 s_load_dwordx4 s[40:43], s[4:5], 0x0 ; C0940500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s2, s[8:11], 0x0 ; C2010900 s_buffer_load_dword s1, s[8:11], 0x1 ; C2008901 s_buffer_load_dword s0, s[8:11], 0x2 ; C2000902 s_buffer_load_dword s3, s[8:11], 0x10 ; C2018910 s_buffer_load_dword s12, s[8:11], 0x11 ; C2060911 s_buffer_load_dword s20, s[8:11], 0x12 ; C20A0912 s_buffer_load_dword s21, s[8:11], 0x13 ; C20A8913 s_buffer_load_dword s19, s[8:11], 0x14 ; C2098914 s_buffer_load_dword s16, s[8:11], 0x15 ; C2080915 s_buffer_load_dword s15, s[8:11], 0x16 ; C2078916 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s3 ; 7E100203 v_mov_b32_e32 v9, s12 ; 7E12020C s_buffer_load_dword s17, s[8:11], 0x17 ; C2088917 s_buffer_load_dword s23, s[8:11], 0x36 ; C20B8936 v_interp_p1_f32 v10, v0, 3, 1, [m0] ; C8280700 v_interp_p2_f32 v10, [v10], v1, 3, 1, [m0] ; C8290701 v_mov_b32_e32 v11, 0xb58637bd ; 7E1602FF B58637BD v_mov_b32_e32 v12, 0x7fffffff ; 7E1802FF 7FFFFFFF v_mov_b32_e32 v13, 0x3f170a3d ; 7E1A02FF 3F170A3D v_mov_b32_e32 v14, 0x3de147ae ; 7E1C02FF 3DE147AE v_sub_f32_e64 v15, 1.0, s2 ; D208000F 000004F2 s_buffer_load_dword s72, s[8:11], 0x38 ; C2240938 s_buffer_load_dword s73, s[8:11], 0x39 ; C2248939 s_buffer_load_dword s22, s[8:11], 0x3a ; C20B093A s_buffer_load_dword s18, s[8:11], 0x3b ; C209093B s_buffer_load_dword s74, s[8:11], 0x3c ; C225093C s_buffer_load_dword s14, s[8:11], 0x40 ; C2070940 s_buffer_load_dword s13, s[8:11], 0x41 ; C2068941 s_buffer_load_dword s3, s[8:11], 0x42 ; C2018942 s_buffer_load_dword s12, s[8:11], 0x44 ; C2060944 s_buffer_load_dword s75, s[8:11], 0x34 ; C2258934 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v16, 1.0, s72 ; D2080010 000090F2 s_buffer_load_dword s76, s[8:11], 0x35 ; C2260935 v_sub_f32_e64 v17, 1.0, s22 ; D2080011 00002CF2 v_interp_p1_f32 v18, v0, 0, 2, [m0] ; C8480800 v_interp_p2_f32 v18, [v18], v1, 0, 2, [m0] ; C8490801 v_interp_p1_f32 v19, v0, 1, 2, [m0] ; C84C0900 v_interp_p2_f32 v19, [v19], v1, 1, 2, [m0] ; C84D0901 v_interp_p1_f32 v20, v0, 0, 3, [m0] ; C8500C00 v_interp_p2_f32 v20, [v20], v1, 0, 3, [m0] ; C8510C01 v_interp_p1_f32 v21, v0, 1, 3, [m0] ; C8540D00 v_interp_p2_f32 v21, [v21], v1, 1, 3, [m0] ; C8550D01 s_load_dwordx4 s[52:55], s[4:5], 0x4 ; C09A0504 s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 v_interp_p1_f32 v22, v0, 2, 3, [m0] ; C8580E00 v_interp_p2_f32 v22, [v22], v1, 2, 3, [m0] ; C8590E01 v_interp_p1_f32 v23, v0, 3, 4, [m0] ; C85C1300 v_interp_p2_f32 v23, [v23], v1, 3, 4, [m0] ; C85D1301 v_interp_p1_f32 v24, v0, 0, 5, [m0] ; C8601400 v_interp_p2_f32 v24, [v24], v1, 0, 5, [m0] ; C8611401 v_interp_p1_f32 v25, v0, 1, 5, [m0] ; C8641500 v_interp_p2_f32 v25, [v25], v1, 1, 5, [m0] ; C8651501 v_interp_p1_f32 v0, v0, 2, 5, [m0] ; C8001600 v_interp_p2_f32 v0, [v0], v1, 2, 5, [m0] ; C8011601 s_load_dwordx8 s[64:71], s[6:7], 0x0 ; C0E00700 s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C s_load_dwordx8 s[56:63], s[6:7], 0x8 ; C0DC0708 s_load_dwordx8 s[44:51], s[6:7], 0x10 ; C0D60710 s_load_dwordx8 s[28:35], s[6:7], 0x18 ; C0CE0718 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[64:71], s[40:43] ; F0800700 01501A12 v_mul_f32_e32 v1, v24, v24 ; 10023118 v_mad_f32 v1, v25, v25, v1 ; D2820001 04063319 v_mad_f32 v1, v0, v0, v1 ; D2820001 04060100 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v26, 2.0, v26, -1.0 ; D282001A 03CE34F4 v_mad_f32 v27, 2.0, v27, -1.0 ; D282001B 03CE36F4 v_mad_f32 v28, 2.0, v28, -1.0 ; D282001C 03CE38F4 v_min_f32_e32 v1, 0x7f7fffff, v1 ; 1E0202FF 7F7FFFFF v_mul_f32_e32 v29, v1, v24 ; 103A3101 v_mul_f32_e32 v30, v1, v25 ; 103C3301 v_mul_f32_e32 v31, v29, v26 ; 103E351D v_mad_f32 v31, v27, v30, v31 ; D282001F 047E3D1B v_mul_f32_e32 v32, v1, v0 ; 10400101 v_mad_f32 v31, v28, v32, v31 ; D282001F 047E411C v_add_f32_e64 v31, 0, v31 clamp ; D206081F 00023E80 v_and_b32_e32 v33, v31, v12 ; 3642191F v_log_f32_e32 v33, v33 ; 7E424F21 image_sample v[34:36], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[56:63], s[52:55] ; F0800700 01AE2212 v_mul_legacy_f32_e32 v33, s73, v33 ; 0E424249 v_add_f32_e32 v31, v11, v31 ; 063E3F0B v_exp_f32_e32 v33, v33 ; 7E424B21 v_cmp_gt_f32_e32 vcc, 0, v31 ; 7C083E80 v_cndmask_b32_e64 v31, v33, 0, vcc ; D200001F 01A90121 v_mul_f32_e32 v33, s75, v31 ; 10423E4B v_mul_f32_e32 v37, s76, v31 ; 104A3E4C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v38, 0x3e99999a, v34 ; 104C44FF 3E99999A v_mad_f32 v38, v35, v13, v38 ; D2820026 049A1B23 v_mad_f32 v38, v36, v14, v38 ; D2820026 049A1D24 v_mul_f32_e32 v39, v34, v16 ; 104E2122 v_mad_f32 v39, s72, v38, v39 ; D2820027 049E4C48 v_mul_f32_e32 v40, v35, v16 ; 10502123 v_mad_f32 v40, s72, v38, v40 ; D2820028 04A24C48 v_mul_f32_e32 v16, v36, v16 ; 10202124 v_mad_f32 v16, s72, v38, v16 ; D2820010 04424C48 v_mul_f32_e32 v38, s23, v31 ; 104C3E17 v_mad_f32 v33, v39, v33, -v34 ; D2820021 848A4327 v_mad_f32 v37, v40, v37, -v35 ; D2820025 848E4B28 v_mad_f32 v16, v16, v38, -v36 ; D2820010 84924D10 image_sample v38, 2, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[44:51], s[36:39] ; F0800200 012B2612 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v31, v38, v31 ; 103E3F26 v_mad_f32 v33, v31, v33, v34 ; D2820021 048A431F v_mad_f32 v34, v31, v37, v35 ; D2820022 048E4B1F v_mad_f32 v16, v31, v16, v36 ; D2820010 0492211F v_mul_f32_e32 v31, v26, v26 ; 103E351A v_mad_f32 v31, v27, v27, v31 ; D282001F 047E371B v_mad_f32 v31, v28, v28, v31 ; D282001F 047E391C v_rsq_clamp_f32_e32 v31, v31 ; 7E3E591F image_sample v[35:37], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[28:35], s[24:27] ; F0800700 00C72312 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v18, s74, v35 ; 1024464A v_mul_f32_e32 v19, s74, v36 ; 1026484A v_mul_f32_e32 v35, s74, v37 ; 10464A4A v_min_f32_e32 v31, 0x7f7fffff, v31 ; 1E3E3EFF 7F7FFFFF v_mul_f32_e32 v26, v31, v26 ; 1034351F v_mul_f32_e32 v27, v31, v27 ; 1036371F v_mul_f32_e32 v28, v31, v28 ; 1038391F v_mul_f32_e32 v29, v29, v26 ; 103A351D v_mad_f32 v29, v27, v30, v29 ; D282001D 04763D1B v_mad_f32 v29, v28, v32, v29 ; D282001D 0476411C v_mul_f32_e32 v30, v26, v29 ; 103C3B1A v_mad_f32 v30, v29, v26, v30 ; D282001E 047A351D v_mad_f32 v24, -v24, v1, v30 ; D2820018 247A0318 v_mul_f32_e32 v30, v27, v29 ; 103C3B1B v_mad_f32 v30, v29, v27, v30 ; D282001E 047A371D v_mad_f32 v25, -v25, v1, v30 ; D2820019 247A0319 v_mul_f32_e32 v30, v28, v29 ; 103C3B1C v_mad_f32 v29, v29, v28, v30 ; D282001D 047A391D v_mad_f32 v0, -v0, v1, v29 ; D2820000 24760300 v_mul_f32_e32 v1, 0x3e99999a, v33 ; 100242FF 3E99999A v_mad_f32 v1, v34, v13, v1 ; D2820001 04061B22 v_mad_f32 v1, v16, v14, v1 ; D2820001 04061D10 v_mul_f32_e32 v13, v33, v17 ; 101A2321 v_mul_f32_e32 v14, v34, v17 ; 101C2322 v_mul_f32_e32 v16, v16, v17 ; 10202310 v_mad_f32 v13, s22, v1, v13 ; D282000D 04360216 v_mad_f32 v14, s22, v1, v14 ; D282000E 043A0216 v_mad_f32 v1, s22, v1, v16 ; D2820001 04420216 v_mul_f32_e32 v13, s18, v13 ; 101A1A12 v_mul_f32_e32 v13, v15, v13 ; 101A1B0F v_mul_f32_e32 v14, s18, v14 ; 101C1C12 v_sub_f32_e64 v15, 1.0, s1 ; D208000F 000002F2 v_mul_f32_e32 v14, v15, v14 ; 101C1D0F v_mul_f32_e32 v1, s18, v1 ; 10020212 v_sub_f32_e64 v15, 1.0, s0 ; D208000F 000000F2 v_mul_f32_e32 v1, v15, v1 ; 1002030F v_mad_f32 v8, s21, v13, v8 ; D2820008 04221A15 v_mad_f32 v9, s21, v14, v9 ; D2820009 04261C15 v_mov_b32_e32 v13, s20 ; 7E1A0214 v_mad_f32 v1, s21, v1, v13 ; D2820001 04360215 v_mul_f32_e32 v13, s18, v18 ; 101A2412 s_buffer_load_dword s4, s[8:11], 0x18 ; C2020918 s_buffer_load_dword s5, s[8:11], 0x19 ; C2028919 s_buffer_load_dword s6, s[8:11], 0x1a ; C203091A s_buffer_load_dword s7, s[8:11], 0x1c ; C203891C v_mov_b32_e32 v14, s19 ; 7E1C0213 v_mad_f32 v13, s17, v13, v14 ; D282000D 043A1A11 v_mul_f32_e32 v14, s18, v19 ; 101C2612 v_mul_f32_e32 v15, v5, v5 ; 101E0B05 v_mad_f32 v15, v6, v6, v15 ; D282000F 043E0D06 v_mad_f32 v15, v7, v7, v15 ; D282000F 043E0F07 v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F v_mov_b32_e32 v16, s16 ; 7E200210 v_mad_f32 v14, s17, v14, v16 ; D282000E 04421C11 v_mul_f32_e32 v16, s18, v35 ; 10204612 v_mov_b32_e32 v17, s15 ; 7E22020F v_mad_f32 v16, s17, v16, v17 ; D2820010 04462011 v_min_f32_e32 v15, 0x7f7fffff, v15 ; 1E1E1EFF 7F7FFFFF v_mul_f32_e32 v17, v2, v2 ; 10220502 v_mad_f32 v17, v3, v3, v17 ; D2820011 04460703 v_mad_f32 v17, v4, v4, v17 ; D2820011 04460904 v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 v_mul_f32_e32 v5, v15, v5 ; 100A0B0F v_mul_f32_e32 v6, v15, v6 ; 100C0D0F v_mul_f32_e32 v7, v15, v7 ; 100E0F0F v_min_f32_e32 v15, 0x7f7fffff, v17 ; 1E1E22FF 7F7FFFFF v_mul_f32_e32 v2, v15, v2 ; 1004050F v_mul_f32_e32 v3, v15, v3 ; 1006070F v_mul_f32_e32 v4, v15, v4 ; 1008090F v_mul_f32_e32 v15, v3, v7 ; 101E0F03 v_mad_f32 v15, v6, v4, -v15 ; D282000F 843E0906 v_mul_f32_e32 v17, v26, v5 ; 10220B1A v_mad_f32 v17, v6, v27, v17 ; D2820011 04463706 v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mad_f32 v6, v5, v3, -v6 ; D2820006 841A0705 v_mul_f32_e32 v5, v4, v5 ; 100A0B04 v_mad_f32 v5, v7, v2, -v5 ; D2820005 84160507 v_mad_f32 v7, v7, v28, v17 ; D2820007 04463907 v_mul_f32_e32 v2, v26, v2 ; 1004051A v_mad_f32 v2, v3, v27, v2 ; D2820002 040A3703 v_mad_f32 v2, v4, v28, v2 ; D2820002 040A3904 v_mul_f32_e32 v3, v10, v15 ; 10061F0A v_mul_f32_e32 v4, v10, v5 ; 10080B0A v_mul_f32_e32 v5, v10, v6 ; 100A0D0A v_mul_f32_e32 v3, v26, v3 ; 1006071A v_mad_f32 v3, v4, v27, v3 ; D2820003 040E3704 v_mad_f32 v3, v5, v28, v3 ; D2820003 040E3905 v_mul_f32_e32 v4, v7, v7 ; 10080F07 v_mov_b32_e32 v5, 0x40400000 ; 7E0A02FF 40400000 v_mad_f32 v4, v5, v4, -1.0 ; D2820004 03CE0905 s_buffer_load_dword s15, s[8:11], 0x20 ; C2078920 s_buffer_load_dword s16, s[8:11], 0x21 ; C2080921 v_mul_f32_e32 v5, v7, v3 ; 100A0707 v_mov_b32_e32 v6, 0xbf5bab79 ; 7E0C02FF BF5BAB79 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_mul_f32_e32 v10, v7, v2 ; 10140507 v_mul_f32_e32 v6, v6, v10 ; 100C1506 s_buffer_load_dword s17, s[8:11], 0x29 ; C2088929 s_buffer_load_dword s18, s[8:11], 0x28 ; C2090928 v_mul_f32_e32 v4, 0x3e7da73e, v4 ; 100808FF 3E7DA73E s_buffer_load_dword s19, s[8:11], 0x30 ; C2098930 s_buffer_load_dword s20, s[8:11], 0x31 ; C20A0931 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s16, v4 ; 10140810 v_mad_f32 v10, s15, v5, v10 ; D282000A 042A0A0F s_buffer_load_dword s15, s[8:11], 0x22 ; C2078922 s_buffer_load_dword s16, s[8:11], 0x2a ; C208092A s_buffer_load_dword s21, s[8:11], 0x2b ; C20A892B v_mul_f32_e32 v15, s17, v4 ; 101E0811 v_mad_f32 v15, s18, v5, v15 ; D282000F 043E0A12 s_buffer_load_dword s17, s[8:11], 0x32 ; C2088932 s_buffer_load_dword s18, s[8:11], 0x33 ; C2090933 v_mul_f32_e32 v4, s20, v4 ; 10080814 v_mad_f32 v4, s19, v5, v4 ; D2820004 04120A13 s_buffer_load_dword s19, s[8:11], 0x23 ; C2098923 s_buffer_load_dword s20, s[8:11], 0x24 ; C20A0924 s_buffer_load_dword s22, s[8:11], 0x25 ; C20B0925 s_buffer_load_dword s23, s[8:11], 0x26 ; C20B8926 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s15, v6, v10 ; D2820005 042A0C0F v_mad_f32 v10, s16, v6, v15 ; D282000A 043E0C10 v_mad_f32 v4, s17, v6, v4 ; D2820004 04120C11 v_mul_f32_e32 v6, v3, v3 ; 100C0703 v_mad_f32 v6, v2, v2, -v6 ; D2820006 841A0502 s_buffer_load_dword s15, s[8:11], 0x1d ; C207891D v_mul_f32_e32 v6, 0x3edbab79, v6 ; 100C0CFF 3EDBAB79 v_mad_f32 v5, s19, v6, v5 ; D2820005 04160C13 v_mad_f32 v10, s21, v6, v10 ; D282000A 042A0C15 v_mad_f32 v4, s18, v6, v4 ; D2820004 04120C12 v_mul_f32_e32 v6, 0x3f82fc67, v7 ; 100C0EFF 3F82FC67 s_buffer_load_dword s16, s[8:11], 0x1e ; C208091E s_buffer_load_dword s17, s[8:11], 0x1f ; C208891F s_buffer_load_dword s18, s[8:11], 0x2d ; C209092D s_buffer_load_dword s19, s[8:11], 0x2c ; C209892C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s15, v6 ; 100E0C0F v_mov_b32_e32 v15, 0xbf82fc67 ; 7E1E02FF BF82FC67 v_mul_f32_e32 v17, v15, v3 ; 1022070F v_mad_f32 v7, s7, v17, v7 ; D2820007 041E2207 s_buffer_load_dword s7, s[8:11], 0x2e ; C203892E v_mul_f32_e32 v18, s22, v6 ; 10240C16 v_mad_f32 v18, s20, v17, v18 ; D2820012 044A2214 s_buffer_load_dword s15, s[8:11], 0x2f ; C207892F v_mul_f32_e32 v6, s18, v6 ; 100C0C12 v_mad_f32 v6, s19, v17, v6 ; D2820006 041A2213 v_mul_f32_e32 v15, v15, v2 ; 101E050F v_mad_f32 v7, s16, v15, v7 ; D2820007 041E1E10 s_buffer_load_dword s16, s[8:11], 0x27 ; C2080927 v_mad_f32 v17, s23, v15, v18 ; D2820011 044A1E17 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, s7, v15, v6 ; D2820006 041A1E07 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mul_f32_e32 v3, v20, v20 ; 10062914 v_mad_f32 v3, v21, v21, v3 ; D2820003 040E2B15 v_mad_f32 v3, v22, v22, v3 ; D2820003 040E2D16 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_mul_f32_e32 v2, 0x3f5bab79, v2 ; 100404FF 3F5BAB79 v_mad_f32 v7, s17, v2, v7 ; D2820007 041E0411 v_mad_f32 v15, s16, v2, v17 ; D282000F 04460410 v_mad_f32 v2, s15, v2, v6 ; D2820002 041A040F v_min_f32_e32 v3, 0x7f7fffff, v3 ; 1E0606FF 7F7FFFFF v_mul_f32_e32 v6, v3, v20 ; 100C2903 v_mul_f32_e32 v17, v3, v21 ; 10222B03 v_mul_f32_e32 v3, v3, v22 ; 10062D03 v_mul_f32_e32 v18, v6, v26 ; 10243506 v_mad_f32 v18, v27, v17, v18 ; D2820012 044A231B v_mad_f32 v18, v28, v3, v18 ; D2820012 044A071C v_add_f32_e64 v18, 0, v18 clamp ; D2060812 00022480 v_add_f32_e32 v19, v11, v18 ; 0626250B v_cmp_gt_f32_e32 vcc, 0, v19 ; 7C082680 v_mov_b32_e32 v19, 0x3f62dfd0 ; 7E2602FF 3F62DFD0 v_mad_f32 v7, s4, v19, v7 ; D2820007 041E2604 v_mad_f32 v2, s6, v19, v2 ; D2820002 040A2606 v_mad_f32 v15, s5, v19, v15 ; D282000F 043E2605 s_buffer_load_dword s4, s[8:11], 0x45 ; C2020945 s_buffer_load_dword s5, s[8:11], 0x46 ; C2028946 v_cndmask_b32_e64 v18, v18, 0, vcc ; D2000012 01A90112 v_mul_f32_e32 v6, v6, v24 ; 100C3106 v_mad_f32 v6, v25, v17, v6 ; D2820006 041A2319 v_mad_f32 v0, v0, v3, v6 ; D2820000 041A0700 v_add_f32_e32 v3, v7, v5 ; 06060B07 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_and_b32_e32 v5, v0, v12 ; 360A1900 v_log_f32_e32 v5, v5 ; 7E0A4F05 v_max_f32_e32 v3, 0, v3 ; 20060680 v_mad_f32 v3, v8, v3, s2 ; D2820003 000A0708 v_add_f32_e32 v0, v11, v0 ; 0600010B v_mul_legacy_f32_e32 v5, 0x41700000, v5 ; 0E0A0AFF 41700000 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_mul_f32_e32 v5, 0x3f6a46ad, v5 ; 100A0AFF 3F6A46AD v_mul_f32_e32 v6, v5, v13 ; 100C1B05 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v6, 0, vcc ; D2000000 01A90106 v_mad_f32 v0, v8, v18, v0 ; D2820000 04022508 v_mad_f32 v0, v0, s14, v3 ; D2820000 040C1D00 v_mul_f32_e32 v3, v5, v14 ; 10061D05 v_mul_f32_e32 v5, v5, v16 ; 100A2105 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_cndmask_b32_e64 v5, v5, 0, vcc ; D2000005 01A90105 v_mad_f32 v3, v9, v18, v3 ; D2820003 040E2509 v_mad_f32 v5, v1, v18, v5 ; D2820005 04162501 v_add_f32_e32 v2, v2, v4 ; 06040902 v_add_f32_e32 v4, v15, v10 ; 0608150F v_max_f32_e32 v4, 0, v4 ; 20080880 v_mad_f32 v4, v9, v4, s1 ; D2820004 00060909 v_mad_f32 v3, v3, s13, v4 ; D2820003 04101B03 v_max_f32_e32 v2, 0, v2 ; 20040480 v_mad_f32 v2, v1, v2, s0 ; D2820002 00020501 v_mad_f32 v2, v5, s3, v2 ; D2820002 04080705 v_mad_f32 v0, v8, s12, v0 ; D2820000 04001908 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v9, s4, v3 ; D2820003 040C0909 v_mad_f32 v1, v1, s5, v2 ; D2820001 04080B01 v_cvt_pkrtz_f16_f32_e32 v0, v0, v3 ; 5E000700 v_mul_f32_e32 v2, 0x3b800000, v23 ; 10042EFF 3B800000 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 44 Code Size: 1932 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], COLOR, PERSPECTIVE DCL IN[1], COLOR[1], PERSPECTIVE DCL IN[2], TEXCOORD[0], PERSPECTIVE DCL IN[3], TEXCOORD[4], PERSPECTIVE DCL IN[4], TEXCOORD[5], PERSPECTIVE DCL IN[5], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL CONST[0..16] DCL TEMP[0..1] DCL TEMP[2], LOCAL DCL TEMP[3..8] IMM[0] FLT32 { 2.0000, -1.0000, -2.0000, -0.0000} IMM[1] FLT32 { -1.0233, 1.0233, 0.8862, 8.0000} IMM[2] FLT32 { -0.8581, 0.2477, 0.4290, 0.0398} IMM[3] FLT32 { 0.0039, 0.0000, 340282346638528859811704183484516925440.0000, 3.0000} IMM[4] FLT32 { -0.0000, 3.0000, -1.0000, 0.8581} 0: TEX TEMP[0], IN[2], SAMP[0], 2D 1: MAD TEMP[1].xyz, TEMP[0], IMM[0].xxxx, IMM[0].yyyy 2: MAD TEMP[0].xyz, TEMP[0], IMM[0].xxxx, IMM[0].yyzw 3: DP3 TEMP[2].x, IN[5], IN[5] 4: RSQ TEMP[2].x, TEMP[2].xxxx 5: MIN TEMP[2].x, IMM[3].zzzz, TEMP[2].xxxx 6: MUL TEMP[3].xyz, IN[5], TEMP[2].xxxx 7: DP3_SAT TEMP[0].w, TEMP[3], TEMP[1] 8: MUL TEMP[1].x, TEMP[0].wwww, TEMP[0].wwww 9: MUL TEMP[1].x, TEMP[0].wwww, TEMP[1].xxxx 10: ADD TEMP[0].w, TEMP[0].wwww, IMM[4].xxxx 11: TEX TEMP[4], IN[2], SAMP[1], 2D 12: MAD TEMP[0].xyz, TEMP[4].xxxx, TEMP[0], -IMM[0].wwyw 13: DP3 TEMP[2].x, TEMP[0], TEMP[0] 14: RSQ TEMP[2].x, TEMP[2].xxxx 15: MIN TEMP[2].x, IMM[3].zzzz, TEMP[2].xxxx 16: MUL TEMP[5].xyz, TEMP[0], TEMP[2].xxxx 17: DP3 TEMP[0].x, TEMP[5], TEMP[3] 18: MUL TEMP[0].xyz, TEMP[0].xxxx, TEMP[5] 19: MAD TEMP[0].xyz, TEMP[0], IMM[0].xxxx, -TEMP[3] 20: TEX TEMP[3], TEMP[0], SAMP[3], CUBE 21: MUL TEMP[1].yzw, TEMP[3].xxyz, TEMP[4].xxxx 22: MUL TEMP[1].yzw, TEMP[1], CONST[13].xxxx 23: MUL TEMP[1].xyz, TEMP[1].yzww, TEMP[1].xxxx 24: MUL TEMP[1].xyz, TEMP[1], TEMP[4].yyyy 25: MUL TEMP[1].w, TEMP[4].yyyy, CONST[13].wwww 26: MAD TEMP[3].xyz, TEMP[1].wwww, CONST[5].wwww, CONST[5] 27: CMP TEMP[1].xyz, TEMP[0].wwww, -IMM[0].wwww, TEMP[1] 28: TEX TEMP[4], IN[2], SAMP[2], 2D 29: MAD TEMP[1].xyz, CONST[13].yyyy, TEMP[4], TEMP[1] 30: MUL TEMP[1].xyz, TEMP[1], CONST[13].zzzz 31: MOV TEMP[4].y, IMM[0].yyyy 32: ADD TEMP[4].xyz, -TEMP[4].yyyy, -CONST[0] 33: MUL TEMP[1].xyz, TEMP[1], TEMP[4] 34: MAD TEMP[1].xyz, TEMP[1], CONST[4].wwww, CONST[4] 35: DP3 TEMP[2].x, IN[1], IN[1] 36: RSQ TEMP[2].x, TEMP[2].xxxx 37: MIN TEMP[2].x, IMM[3].zzzz, TEMP[2].xxxx 38: MUL TEMP[4].xyz, IN[1], TEMP[2].xxxx 39: DP3 TEMP[2].x, IN[0], IN[0] 40: RSQ TEMP[2].x, TEMP[2].xxxx 41: MIN TEMP[2].x, IMM[3].zzzz, TEMP[2].xxxx 42: MUL TEMP[6].xyz, IN[0], TEMP[2].xxxx 43: MUL TEMP[7].xyz, TEMP[4].zxyw, TEMP[6].yzxw 44: MAD TEMP[7].xyz, TEMP[4].yzxw, TEMP[6].zxyw, -TEMP[7] 45: DP3 TEMP[4].y, TEMP[4], TEMP[5] 46: DP3 TEMP[4].z, TEMP[6], TEMP[5] 47: MUL TEMP[6].xyz, TEMP[7], IN[1].wwww 48: DP3 TEMP[4].x, TEMP[6], TEMP[5] 49: MUL TEMP[6].xz, TEMP[4], TEMP[4].yyyy 50: MUL TEMP[7].xyz, TEMP[4], TEMP[4].xyxw 51: MUL TEMP[8].xyz, TEMP[4], IMM[1].xyxw 52: MAD TEMP[6].w, TEMP[4].zzzz, TEMP[4].zzzz, -TEMP[7].xxxx 53: MAD TEMP[6].y, TEMP[7].yyyy, IMM[4].yyyy, IMM[4].zzzz 54: MUL TEMP[8].w, TEMP[7].zzzz, IMM[4].wwww 55: MUL TEMP[4], TEMP[6], IMM[2].xyxz 56: DP4 TEMP[0].w, CONST[8], TEMP[4] 57: DP4 TEMP[1].w, CONST[7], TEMP[8] 58: MOV TEMP[6].zw, IMM[1] 59: MAD TEMP[1].w, CONST[6].xxxx, TEMP[6].zzzz, TEMP[1].wwww 60: ADD TEMP[7].x, TEMP[0].wwww, TEMP[1].wwww 61: DP4 TEMP[0].w, CONST[10], TEMP[4] 62: DP4 TEMP[1].w, CONST[12], TEMP[4] 63: DP4 TEMP[3].w, CONST[9], TEMP[8] 64: DP4 TEMP[4].x, CONST[11], TEMP[8] 65: MAD TEMP[4].x, CONST[6].zzzz, TEMP[6].zzzz, TEMP[4].xxxx 66: ADD TEMP[7].z, TEMP[1].wwww, TEMP[4].xxxx 67: MAD TEMP[1].w, CONST[6].yyyy, TEMP[6].zzzz, TEMP[3].wwww 68: ADD TEMP[7].y, TEMP[0].wwww, TEMP[1].wwww 69: MUL TEMP[4].xyz, TEMP[1], TEMP[7] 70: CMP TEMP[4].xyz, TEMP[7], -IMM[0].wwww, TEMP[4] 71: ADD TEMP[4].xyz, TEMP[4], CONST[0] 72: DP3 TEMP[2].x, IN[3], IN[3] 73: RSQ TEMP[2].x, TEMP[2].xxxx 74: MIN TEMP[2].x, IMM[3].zzzz, TEMP[2].xxxx 75: MUL TEMP[6].xyz, IN[3], TEMP[2].xxxx 76: DP3_SAT TEMP[0].x, TEMP[0], TEMP[6] 77: DP3_SAT TEMP[0].y, TEMP[5], TEMP[6] 78: POW TEMP[1].w, |TEMP[0].xxxx|, CONST[14].xxxx 79: ADD TEMP[0].x, TEMP[0].xxxx, IMM[4].xxxx 80: ADD TEMP[0].z, TEMP[6].wwww, CONST[14].xxxx 81: MUL TEMP[0].z, TEMP[0].zzzz, TEMP[1].wwww 82: MUL TEMP[0].z, TEMP[0].zzzz, IMM[2].wwww 83: MUL TEMP[3].xyz, TEMP[3], TEMP[0].zzzz 84: CMP TEMP[0].xzw, TEMP[0].xxxx, -IMM[0].wwww, TEMP[3].xyyz 85: MUL TEMP[3].xyz, TEMP[0].yyyy, TEMP[1] 86: ADD TEMP[0].y, TEMP[0].yyyy, IMM[4].xxxx 87: CMP TEMP[3].xyz, TEMP[0].yyyy, -IMM[0].wwww, TEMP[3] 88: ADD TEMP[0].xyz, TEMP[0].xzww, TEMP[3] 89: MAD TEMP[0].xyz, TEMP[0], CONST[15], TEMP[4] 90: MAD OUT[0].xyz, TEMP[1], CONST[16], TEMP[0] 91: MUL OUT[0].w, IMM[3].xxxx, IN[4].wwww 92: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 220) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %73 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %74 = load <32 x i8>, <32 x i8> addrspace(2)* %73, align 32, !tbaa !0 %75 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 %77 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %78 = bitcast <8 x i32> addrspace(2)* %77 to <32 x i8> addrspace(2)* %79 = load <32 x i8>, <32 x i8> addrspace(2)* %78, align 32, !tbaa !0 %80 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %81 = bitcast <4 x i32> addrspace(2)* %80 to <16 x i8> addrspace(2)* %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %84 = bitcast <8 x i32> addrspace(2)* %83 to <32 x i8> addrspace(2)* %85 = load <32 x i8>, <32 x i8> addrspace(2)* %84, align 32, !tbaa !0 %86 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %87 = bitcast <4 x i32> addrspace(2)* %86 to <16 x i8> addrspace(2)* %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %90 = bitcast <8 x i32> addrspace(2)* %89 to <32 x i8> addrspace(2)* %91 = load <32 x i8>, <32 x i8> addrspace(2)* %90, align 32, !tbaa !0 %92 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %93 = bitcast <4 x i32> addrspace(2)* %92 to <16 x i8> addrspace(2)* %94 = load <16 x i8>, <16 x i8> addrspace(2)* %93, align 16, !tbaa !0 %95 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %111 = bitcast float %102 to i32 %112 = bitcast float %103 to i32 %113 = insertelement <2 x i32> undef, i32 %111, i32 0 %114 = insertelement <2 x i32> %113, i32 %112, i32 1 %115 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %114, <32 x i8> %74, <16 x i8> %76, i32 2) %116 = extractelement <4 x float> %115, i32 0 %117 = extractelement <4 x float> %115, i32 1 %118 = extractelement <4 x float> %115, i32 2 %119 = fmul float %116, 2.000000e+00 %120 = fadd float %119, -1.000000e+00 %121 = fmul float %117, 2.000000e+00 %122 = fadd float %121, -1.000000e+00 %123 = fmul float %118, 2.000000e+00 %124 = fadd float %123, -1.000000e+00 %125 = fmul float %116, 2.000000e+00 %126 = fadd float %125, -1.000000e+00 %127 = fmul float %117, 2.000000e+00 %128 = fadd float %127, -1.000000e+00 %129 = fmul float %118, 2.000000e+00 %130 = fadd float %129, -2.000000e+00 %131 = fmul float %108, %108 %132 = fmul float %109, %109 %133 = fadd float %132, %131 %134 = fmul float %110, %110 %135 = fadd float %133, %134 %136 = call float @llvm.AMDGPU.rsq.clamped.f32(float %135) %137 = call float @llvm.minnum.f32(float %136, float 0x47EFFFFFE0000000) %138 = fmul float %108, %137 %139 = fmul float %109, %137 %140 = fmul float %110, %137 %141 = fmul float %138, %120 %142 = fmul float %139, %122 %143 = fadd float %142, %141 %144 = fmul float %140, %124 %145 = fadd float %143, %144 %146 = call float @llvm.AMDIL.clamp.(float %145, float 0.000000e+00, float 1.000000e+00) %147 = fmul float %146, %146 %148 = fmul float %146, %147 %149 = fadd float %146, 0xBEB0C6F7A0000000 %150 = bitcast float %102 to i32 %151 = bitcast float %103 to i32 %152 = insertelement <2 x i32> undef, i32 %150, i32 0 %153 = insertelement <2 x i32> %152, i32 %151, i32 1 %154 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %153, <32 x i8> %79, <16 x i8> %82, i32 2) %155 = extractelement <4 x float> %154, i32 0 %156 = extractelement <4 x float> %154, i32 1 %157 = fmul float %155, %126 %158 = fadd float %157, 0.000000e+00 %159 = fmul float %155, %128 %160 = fadd float %159, 0.000000e+00 %161 = fmul float %155, %130 %162 = fadd float %161, 1.000000e+00 %163 = fmul float %158, %158 %164 = fmul float %160, %160 %165 = fadd float %164, %163 %166 = fmul float %162, %162 %167 = fadd float %165, %166 %168 = call float @llvm.AMDGPU.rsq.clamped.f32(float %167) %169 = call float @llvm.minnum.f32(float %168, float 0x47EFFFFFE0000000) %170 = fmul float %158, %169 %171 = fmul float %160, %169 %172 = fmul float %162, %169 %173 = fmul float %170, %138 %174 = fmul float %171, %139 %175 = fadd float %174, %173 %176 = fmul float %172, %140 %177 = fadd float %175, %176 %178 = fmul float %177, %170 %179 = fmul float %177, %171 %180 = fmul float %177, %172 %181 = fmul float %178, 2.000000e+00 %182 = fsub float %181, %138 %183 = fmul float %179, 2.000000e+00 %184 = fsub float %183, %139 %185 = fmul float %180, 2.000000e+00 %186 = fsub float %185, %140 %187 = insertelement <4 x float> undef, float %182, i32 0 %188 = insertelement <4 x float> %187, float %184, i32 1 %189 = insertelement <4 x float> %188, float %186, i32 2 %190 = insertelement <4 x float> %189, float %149, i32 3 %191 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %190) %192 = extractelement <4 x float> %191, i32 0 %193 = extractelement <4 x float> %191, i32 1 %194 = extractelement <4 x float> %191, i32 2 %195 = extractelement <4 x float> %191, i32 3 %196 = call float @fabs(float %194) %197 = fdiv float 1.000000e+00, %196 %198 = fmul float %192, %197 %199 = fadd float %198, 1.500000e+00 %200 = fmul float %193, %197 %201 = fadd float %200, 1.500000e+00 %202 = bitcast float %201 to i32 %203 = bitcast float %199 to i32 %204 = bitcast float %195 to i32 %205 = insertelement <4 x i32> undef, i32 %202, i32 0 %206 = insertelement <4 x i32> %205, i32 %203, i32 1 %207 = insertelement <4 x i32> %206, i32 %204, i32 2 %208 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %207, <32 x i8> %91, <16 x i8> %94, i32 4) %209 = extractelement <4 x float> %208, i32 0 %210 = extractelement <4 x float> %208, i32 1 %211 = extractelement <4 x float> %208, i32 2 %212 = fmul float %209, %155 %213 = fmul float %210, %155 %214 = fmul float %211, %155 %215 = fmul float %212, %62 %216 = fmul float %213, %62 %217 = fmul float %214, %62 %218 = fmul float %215, %148 %219 = fmul float %216, %148 %220 = fmul float %217, %148 %221 = fmul float %218, %156 %222 = fmul float %219, %156 %223 = fmul float %220, %156 %224 = fmul float %156, %65 %225 = fmul float %224, %34 %226 = fadd float %225, %31 %227 = fmul float %224, %34 %228 = fadd float %227, %32 %229 = fmul float %224, %34 %230 = fadd float %229, %33 %231 = call float @llvm.AMDGPU.cndlt(float %149, float 0.000000e+00, float %221) %232 = call float @llvm.AMDGPU.cndlt(float %149, float 0.000000e+00, float %222) %233 = call float @llvm.AMDGPU.cndlt(float %149, float 0.000000e+00, float %223) %234 = bitcast float %102 to i32 %235 = bitcast float %103 to i32 %236 = insertelement <2 x i32> undef, i32 %234, i32 0 %237 = insertelement <2 x i32> %236, i32 %235, i32 1 %238 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %237, <32 x i8> %85, <16 x i8> %88, i32 2) %239 = extractelement <4 x float> %238, i32 0 %240 = extractelement <4 x float> %238, i32 1 %241 = extractelement <4 x float> %238, i32 2 %242 = fmul float %63, %239 %243 = fadd float %242, %231 %244 = fmul float %63, %240 %245 = fadd float %244, %232 %246 = fmul float %63, %241 %247 = fadd float %246, %233 %248 = fmul float %243, %64 %249 = fmul float %245, %64 %250 = fmul float %247, %64 %251 = fsub float 1.000000e+00, %24 %252 = fsub float 1.000000e+00, %25 %253 = fsub float 1.000000e+00, %26 %254 = fmul float %248, %251 %255 = fmul float %249, %252 %256 = fmul float %250, %253 %257 = fmul float %254, %30 %258 = fadd float %257, %27 %259 = fmul float %255, %30 %260 = fadd float %259, %28 %261 = fmul float %256, %30 %262 = fadd float %261, %29 %263 = fmul float %98, %98 %264 = fmul float %99, %99 %265 = fadd float %264, %263 %266 = fmul float %100, %100 %267 = fadd float %265, %266 %268 = call float @llvm.AMDGPU.rsq.clamped.f32(float %267) %269 = call float @llvm.minnum.f32(float %268, float 0x47EFFFFFE0000000) %270 = fmul float %98, %269 %271 = fmul float %99, %269 %272 = fmul float %100, %269 %273 = fmul float %95, %95 %274 = fmul float %96, %96 %275 = fadd float %274, %273 %276 = fmul float %97, %97 %277 = fadd float %275, %276 %278 = call float @llvm.AMDGPU.rsq.clamped.f32(float %277) %279 = call float @llvm.minnum.f32(float %278, float 0x47EFFFFFE0000000) %280 = fmul float %95, %279 %281 = fmul float %96, %279 %282 = fmul float %97, %279 %283 = fmul float %272, %281 %284 = fmul float %270, %282 %285 = fmul float %271, %280 %286 = fmul float %271, %282 %287 = fsub float %286, %283 %288 = fmul float %272, %280 %289 = fsub float %288, %284 %290 = fmul float %270, %281 %291 = fsub float %290, %285 %292 = fmul float %270, %170 %293 = fmul float %271, %171 %294 = fadd float %293, %292 %295 = fmul float %272, %172 %296 = fadd float %294, %295 %297 = fmul float %280, %170 %298 = fmul float %281, %171 %299 = fadd float %298, %297 %300 = fmul float %282, %172 %301 = fadd float %299, %300 %302 = fmul float %287, %101 %303 = fmul float %289, %101 %304 = fmul float %291, %101 %305 = fmul float %302, %170 %306 = fmul float %303, %171 %307 = fadd float %306, %305 %308 = fmul float %304, %172 %309 = fadd float %307, %308 %310 = fmul float %309, %296 %311 = fmul float %301, %296 %312 = fmul float %309, %309 %313 = fmul float %296, %296 %314 = fmul float %301, %309 %315 = fmul float %309, 0xBFF05F8CE0000000 %316 = fmul float %296, 0x3FF05F8CE0000000 %317 = fmul float %301, 0xBFF05F8CE0000000 %318 = fmul float %301, %301 %319 = fsub float %318, %312 %320 = fmul float %313, 3.000000e+00 %321 = fadd float %320, -1.000000e+00 %322 = fmul float %314, 0x3FEB756F20000000 %323 = fmul float %310, 0xBFEB756F20000000 %324 = fmul float %321, 0x3FCFB4E7C0000000 %325 = fmul float %311, 0xBFEB756F20000000 %326 = fmul float %319, 0x3FDB756F20000000 %327 = fmul float %42, %323 %328 = fmul float %43, %324 %329 = fadd float %327, %328 %330 = fmul float %44, %325 %331 = fadd float %329, %330 %332 = fmul float %45, %326 %333 = fadd float %331, %332 %334 = fmul float %38, %315 %335 = fmul float %39, %316 %336 = fadd float %334, %335 %337 = fmul float %40, %317 %338 = fadd float %336, %337 %339 = fmul float %41, %322 %340 = fadd float %338, %339 %341 = fmul float %35, 0x3FEC5BFA00000000 %342 = fadd float %341, %340 %343 = fadd float %333, %342 %344 = fmul float %50, %323 %345 = fmul float %51, %324 %346 = fadd float %344, %345 %347 = fmul float %52, %325 %348 = fadd float %346, %347 %349 = fmul float %53, %326 %350 = fadd float %348, %349 %351 = fmul float %58, %323 %352 = fmul float %59, %324 %353 = fadd float %351, %352 %354 = fmul float %60, %325 %355 = fadd float %353, %354 %356 = fmul float %61, %326 %357 = fadd float %355, %356 %358 = fmul float %46, %315 %359 = fmul float %47, %316 %360 = fadd float %358, %359 %361 = fmul float %48, %317 %362 = fadd float %360, %361 %363 = fmul float %49, %322 %364 = fadd float %362, %363 %365 = fmul float %54, %315 %366 = fmul float %55, %316 %367 = fadd float %365, %366 %368 = fmul float %56, %317 %369 = fadd float %367, %368 %370 = fmul float %57, %322 %371 = fadd float %369, %370 %372 = fmul float %37, 0x3FEC5BFA00000000 %373 = fadd float %372, %371 %374 = fadd float %357, %373 %375 = fmul float %36, 0x3FEC5BFA00000000 %376 = fadd float %375, %364 %377 = fadd float %350, %376 %378 = fmul float %258, %343 %379 = fmul float %260, %377 %380 = fmul float %262, %374 %381 = call float @llvm.AMDGPU.cndlt(float %343, float 0.000000e+00, float %378) %382 = call float @llvm.AMDGPU.cndlt(float %377, float 0.000000e+00, float %379) %383 = call float @llvm.AMDGPU.cndlt(float %374, float 0.000000e+00, float %380) %384 = fadd float %381, %24 %385 = fadd float %382, %25 %386 = fadd float %383, %26 %387 = fmul float %104, %104 %388 = fmul float %105, %105 %389 = fadd float %388, %387 %390 = fmul float %106, %106 %391 = fadd float %389, %390 %392 = call float @llvm.AMDGPU.rsq.clamped.f32(float %391) %393 = call float @llvm.minnum.f32(float %392, float 0x47EFFFFFE0000000) %394 = fmul float %104, %393 %395 = fmul float %105, %393 %396 = fmul float %106, %393 %397 = fmul float %182, %394 %398 = fmul float %184, %395 %399 = fadd float %398, %397 %400 = fmul float %186, %396 %401 = fadd float %399, %400 %402 = call float @llvm.AMDIL.clamp.(float %401, float 0.000000e+00, float 1.000000e+00) %403 = fmul float %170, %394 %404 = fmul float %171, %395 %405 = fadd float %404, %403 %406 = fmul float %172, %396 %407 = fadd float %405, %406 %408 = call float @llvm.AMDIL.clamp.(float %407, float 0.000000e+00, float 1.000000e+00) %409 = call float @fabs(float %402) %410 = call float @llvm.pow.f32(float %409, float %66) %411 = fadd float %402, 0xBEB0C6F7A0000000 %412 = fadd float %66, 8.000000e+00 %413 = fmul float %412, %410 %414 = fmul float %413, 0x3FA45F3060000000 %415 = fmul float %226, %414 %416 = fmul float %228, %414 %417 = fmul float %230, %414 %418 = call float @llvm.AMDGPU.cndlt(float %411, float 0.000000e+00, float %415) %419 = call float @llvm.AMDGPU.cndlt(float %411, float 0.000000e+00, float %416) %420 = call float @llvm.AMDGPU.cndlt(float %411, float 0.000000e+00, float %417) %421 = fmul float %408, %258 %422 = fmul float %408, %260 %423 = fmul float %408, %262 %424 = fadd float %408, 0xBEB0C6F7A0000000 %425 = call float @llvm.AMDGPU.cndlt(float %424, float 0.000000e+00, float %421) %426 = call float @llvm.AMDGPU.cndlt(float %424, float 0.000000e+00, float %422) %427 = call float @llvm.AMDGPU.cndlt(float %424, float 0.000000e+00, float %423) %428 = fadd float %418, %425 %429 = fadd float %419, %426 %430 = fadd float %420, %427 %431 = fmul float %428, %67 %432 = fadd float %431, %384 %433 = fmul float %429, %68 %434 = fadd float %433, %385 %435 = fmul float %430, %69 %436 = fadd float %435, %386 %437 = fmul float %258, %70 %438 = fadd float %437, %432 %439 = fmul float %260, %71 %440 = fadd float %439, %434 %441 = fmul float %262, %72 %442 = fadd float %441, %436 %443 = fmul float %107, 3.906250e-03 %444 = call i32 @llvm.SI.packf16(float %438, float %440) %445 = bitcast i32 %444 to float %446 = call i32 @llvm.SI.packf16(float %442, float %443) %447 = bitcast i32 %446 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %445, float %447, float %445, float %447) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[48:51], s[4:5], 0x0 ; C0980500 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[12:15], 0x0 ; C2000D00 s_buffer_load_dword s9, s[12:15], 0x1 ; C2048D01 s_buffer_load_dword s8, s[12:15], 0x2 ; C2040D02 s_buffer_load_dword s1, s[12:15], 0x10 ; C2008D10 s_buffer_load_dword s2, s[12:15], 0x11 ; C2010D11 s_buffer_load_dword s3, s[12:15], 0x12 ; C2018D12 s_buffer_load_dword s16, s[12:15], 0x13 ; C2080D13 s_buffer_load_dword s10, s[12:15], 0x14 ; C2050D14 s_buffer_load_dword s11, s[12:15], 0x15 ; C2058D15 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s1 ; 7E100201 s_buffer_load_dword s1, s[12:15], 0x16 ; C2008D16 v_mov_b32_e32 v9, s2 ; 7E120202 s_buffer_load_dword s17, s[12:15], 0x17 ; C2088D17 v_mov_b32_e32 v10, s3 ; 7E140203 s_buffer_load_dword s2, s[12:15], 0x18 ; C2010D18 v_interp_p1_f32 v11, v0, 3, 1, [m0] ; C82C0700 v_interp_p2_f32 v11, [v11], v1, 3, 1, [m0] ; C82D0701 v_mov_b32_e32 v12, s10 ; 7E18020A v_mov_b32_e32 v13, s11 ; 7E1A020B s_buffer_load_dword s3, s[12:15], 0x19 ; C2018D19 s_buffer_load_dword s10, s[12:15], 0x1a ; C2050D1A v_mov_b32_e32 v14, 0xb58637bd ; 7E1C02FF B58637BD v_mov_b32_e32 v15, 0x3fc00000 ; 7E1E02FF 3FC00000 v_sub_f32_e64 v16, 1.0, s0 ; D2080010 000000F2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v17, s1 ; 7E220201 s_buffer_load_dword s11, s[12:15], 0x1c ; C2058D1C v_sub_f32_e64 v18, 1.0, s9 ; D2080012 000012F2 v_sub_f32_e64 v19, 1.0, s8 ; D2080013 000010F2 v_mul_f32_e32 v20, v5, v5 ; 10280B05 v_mad_f32 v20, v6, v6, v20 ; D2820014 04520D06 v_mad_f32 v20, v7, v7, v20 ; D2820014 04520F07 v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 v_mul_f32_e32 v21, v2, v2 ; 102A0502 v_mad_f32 v21, v3, v3, v21 ; D2820015 04560703 v_mad_f32 v21, v4, v4, v21 ; D2820015 04560904 s_buffer_load_dword s18, s[12:15], 0x36 ; C2090D36 s_buffer_load_dword s19, s[12:15], 0x37 ; C2098D37 s_buffer_load_dword s1, s[12:15], 0x38 ; C2008D38 s_buffer_load_dword s21, s[12:15], 0x34 ; C20A8D34 s_buffer_load_dword s20, s[12:15], 0x35 ; C20A0D35 s_load_dwordx4 s[52:55], s[4:5], 0x4 ; C09A0504 s_load_dwordx4 s[24:27], s[4:5], 0x8 ; C08C0508 s_load_dwordx4 s[28:31], s[4:5], 0xc ; C08E050C v_interp_p1_f32 v22, v0, 0, 2, [m0] ; C8580800 v_interp_p2_f32 v22, [v22], v1, 0, 2, [m0] ; C8590801 v_interp_p1_f32 v23, v0, 1, 2, [m0] ; C85C0900 v_interp_p2_f32 v23, [v23], v1, 1, 2, [m0] ; C85D0901 v_interp_p1_f32 v24, v0, 0, 3, [m0] ; C8600C00 v_interp_p2_f32 v24, [v24], v1, 0, 3, [m0] ; C8610C01 v_interp_p1_f32 v25, v0, 1, 3, [m0] ; C8640D00 v_interp_p2_f32 v25, [v25], v1, 1, 3, [m0] ; C8650D01 v_interp_p1_f32 v26, v0, 2, 3, [m0] ; C8680E00 v_interp_p2_f32 v26, [v26], v1, 2, 3, [m0] ; C8690E01 v_interp_p1_f32 v27, v0, 3, 4, [m0] ; C86C1300 v_interp_p2_f32 v27, [v27], v1, 3, 4, [m0] ; C86D1301 v_interp_p1_f32 v28, v0, 0, 5, [m0] ; C8701400 v_interp_p2_f32 v28, [v28], v1, 0, 5, [m0] ; C8711401 v_interp_p1_f32 v29, v0, 1, 5, [m0] ; C8741500 v_interp_p2_f32 v29, [v29], v1, 1, 5, [m0] ; C8751501 v_interp_p1_f32 v0, v0, 2, 5, [m0] ; C8001600 v_interp_p2_f32 v0, [v0], v1, 2, 5, [m0] ; C8011601 s_load_dwordx8 s[64:71], s[6:7], 0x0 ; C0E00700 s_load_dwordx8 s[56:63], s[6:7], 0x8 ; C0DC0708 s_load_dwordx8 s[32:39], s[6:7], 0x10 ; C0D00710 s_load_dwordx8 s[40:47], s[6:7], 0x18 ; C0D40718 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[30:32], 7, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[64:71], s[48:51] ; F0800700 01901E16 image_sample v[33:34], 3, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[56:63], s[52:55] ; F0800300 01AE2116 v_mul_f32_e32 v1, v28, v28 ; 1002391C v_mad_f32 v1, v29, v29, v1 ; D2820001 04063B1D v_mad_f32 v1, v0, v0, v1 ; D2820001 04060100 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v30, 2.0, v30, -1.0 ; D282001E 03CE3CF4 v_mad_f32 v31, 2.0, v31, -1.0 ; D282001F 03CE3EF4 v_mad_f32 v35, 2.0, v32, -1.0 ; D2820023 03CE40F4 v_mad_f32 v32, 2.0, v32, -2.0 ; D2820020 03D640F4 v_min_f32_e32 v1, 0x7f7fffff, v1 ; 1E0202FF 7F7FFFFF v_mul_f32_e32 v36, v1, v28 ; 10483901 v_mul_f32_e32 v37, v30, v36 ; 104A491E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v30, v33, v30, 0 ; D282001E 02023D21 v_mad_f32 v32, v33, v32, 1.0 ; D2820020 03CA4121 v_mad_f32 v38, v33, v31, 0 ; D2820026 02023F21 v_mul_f32_e32 v39, v30, v30 ; 104E3D1E v_mad_f32 v39, v38, v38, v39 ; D2820027 049E4D26 v_mad_f32 v39, v32, v32, v39 ; D2820027 049E4120 v_rsq_clamp_f32_e32 v39, v39 ; 7E4E5927 v_mul_f32_e32 v40, v1, v29 ; 10503B01 v_mad_f32 v31, v40, v31, v37 ; D282001F 04963F28 v_mul_f32_e32 v37, v1, v0 ; 104A0101 v_mad_f32 v31, v37, v35, v31 ; D282001F 047E4725 v_min_f32_e32 v35, 0x7f7fffff, v39 ; 1E464EFF 7F7FFFFF v_mul_f32_e32 v30, v35, v30 ; 103C3D23 v_mul_f32_e32 v38, v35, v38 ; 104C4D23 v_mul_f32_e32 v32, v35, v32 ; 10404123 v_mul_f32_e32 v35, v36, v30 ; 10463D24 v_mad_f32 v35, v38, v40, v35 ; D2820023 048E5126 v_mad_f32 v35, v32, v37, v35 ; D2820023 048E4B20 v_add_f32_e64 v31, 0, v31 clamp ; D206081F 00023E80 v_mul_f32_e32 v36, v30, v35 ; 1048471E v_mad_f32 v36, v35, v30, v36 ; D2820024 04923D23 v_add_f32_e32 v42, v14, v31 ; 06543F0E v_mad_f32 v39, -v28, v1, v36 ; D2820027 2492031C v_mul_f32_e32 v28, v38, v35 ; 10384726 v_mad_f32 v28, v35, v38, v28 ; D282001C 04724D23 v_mad_f32 v40, -v29, v1, v28 ; D2820028 2472031D v_mul_f32_e32 v28, v32, v35 ; 10384720 v_mad_f32 v28, v35, v32, v28 ; D282001C 04724123 v_mad_f32 v41, -v0, v1, v28 ; D2820029 24720300 v_cubeid_f32 v46, v39, v40, v41 ; D288002E 04A65127 v_cubema_f32 v45, v39, v40, v41 ; D28E002D 04A65127 v_cubesc_f32 v44, v39, v40, v41 ; D28A002C 04A65127 v_cubetc_f32 v43, v39, v40, v41 ; D28C002B 04A65127 v_rcp_f32_e64 v0, |v45| ; D3540100 0000012D v_mul_f32_e32 v1, v24, v24 ; 10023118 v_mad_f32 v1, v25, v25, v1 ; D2820001 04063319 v_mad_f32 v1, v26, v26, v1 ; D2820001 0406351A v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mad_f32 v45, v43, v0, v15 ; D282002D 043E012B v_mad_f32 v44, v44, v0, v15 ; D282002C 043E012C image_sample v[35:37], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[40:47], s[28:31] ; F0800700 00EA232C v_min_f32_e32 v0, 0x7f7fffff, v1 ; 1E0002FF 7F7FFFFF v_mul_f32_e32 v1, v0, v24 ; 10023100 v_mul_f32_e32 v15, v0, v25 ; 101E3300 v_mul_f32_e32 v0, v0, v26 ; 10003500 v_mul_f32_e32 v24, v1, v39 ; 10304F01 v_mad_f32 v24, v40, v15, v24 ; D2820018 04621F28 v_cmp_gt_f32_e32 vcc, 0, v42 ; 7C085480 v_mad_f32 v24, v41, v0, v24 ; D2820018 04620129 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v25, v33, v35 ; 10324721 v_mul_f32_e32 v26, v33, v36 ; 10344921 v_mul_f32_e32 v28, v33, v37 ; 10384B21 v_mul_f32_e32 v29, v31, v31 ; 103A3F1F v_mul_f32_e32 v29, v29, v31 ; 103A3F1D v_mul_f32_e32 v25, s21, v25 ; 10323215 v_mul_f32_e32 v26, s21, v26 ; 10343415 v_mul_f32_e32 v28, s21, v28 ; 10383815 v_mul_f32_e32 v25, v29, v25 ; 1032331D v_mul_f32_e32 v26, v29, v26 ; 1034351D v_mul_f32_e32 v28, v29, v28 ; 1038391D v_mul_f32_e32 v25, v34, v25 ; 10323322 v_mul_f32_e32 v26, v34, v26 ; 10343522 v_mul_f32_e32 v28, v34, v28 ; 10383922 v_cndmask_b32_e64 v25, v25, 0, vcc ; D2000019 01A90119 v_cndmask_b32_e64 v26, v26, 0, vcc ; D200001A 01A9011A v_cndmask_b32_e64 v28, v28, 0, vcc ; D200001C 01A9011C image_sample v[35:37], 7, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[32:39], s[24:27] ; F0800700 00C82316 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v22, s20, v35, v25 ; D2820016 04664614 v_mad_f32 v23, s20, v36, v26 ; D2820017 046A4814 v_mad_f32 v25, s20, v37, v28 ; D2820019 04724A14 s_buffer_load_dword s4, s[12:15], 0x3c ; C2020D3C s_buffer_load_dword s5, s[12:15], 0x3d ; C2028D3D v_mul_f32_e32 v26, s19, v34 ; 10344413 v_mad_f32 v12, s17, v26, v12 ; D282000C 04323411 v_mad_f32 v13, s17, v26, v13 ; D282000D 04363411 v_mad_f32 v17, s17, v26, v17 ; D2820011 04463411 v_mul_f32_e32 v22, s18, v22 ; 102C2C12 v_mul_f32_e32 v16, v16, v22 ; 10202D10 v_mul_f32_e32 v22, s18, v23 ; 102C2E12 v_mul_f32_e32 v18, v18, v22 ; 10242D12 v_mul_f32_e32 v22, s18, v25 ; 102C3212 v_mul_f32_e32 v19, v19, v22 ; 10262D13 v_mad_f32 v8, s16, v16, v8 ; D2820008 04222010 v_mad_f32 v9, s16, v18, v9 ; D2820009 04262410 v_mad_f32 v10, s16, v19, v10 ; D282000A 042A2610 v_min_f32_e32 v16, 0x7f7fffff, v20 ; 1E2028FF 7F7FFFFF v_rsq_clamp_f32_e32 v18, v21 ; 7E245915 v_mul_f32_e32 v5, v16, v5 ; 100A0B10 v_mul_f32_e32 v6, v16, v6 ; 100C0D10 v_mul_f32_e32 v7, v16, v7 ; 100E0F10 v_min_f32_e32 v16, 0x7f7fffff, v18 ; 1E2024FF 7F7FFFFF v_mul_f32_e32 v2, v16, v2 ; 10040510 v_mul_f32_e32 v3, v16, v3 ; 10060710 v_mul_f32_e32 v4, v16, v4 ; 10080910 v_mul_f32_e32 v16, v3, v7 ; 10200F03 v_mad_f32 v16, v6, v4, -v16 ; D2820010 84420906 v_mul_f32_e32 v18, v30, v5 ; 10240B1E v_mad_f32 v18, v6, v38, v18 ; D2820012 044A4D06 v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mad_f32 v6, v5, v3, -v6 ; D2820006 841A0705 v_mul_f32_e32 v5, v4, v5 ; 100A0B04 v_mad_f32 v5, v7, v2, -v5 ; D2820005 84160507 v_mad_f32 v7, v7, v32, v18 ; D2820007 044A4107 v_mul_f32_e32 v2, v30, v2 ; 1004051E v_mad_f32 v2, v3, v38, v2 ; D2820002 040A4D03 v_mad_f32 v2, v4, v32, v2 ; D2820002 040A4104 v_mul_f32_e32 v3, v11, v16 ; 1006210B v_mul_f32_e32 v4, v11, v5 ; 10080B0B v_mul_f32_e32 v5, v11, v6 ; 100A0D0B v_mul_f32_e32 v3, v30, v3 ; 1006071E v_mad_f32 v3, v4, v38, v3 ; D2820003 040E4D04 v_mad_f32 v3, v5, v32, v3 ; D2820003 040E4105 v_mul_f32_e32 v4, v7, v7 ; 10080F07 v_mov_b32_e32 v5, 0x40400000 ; 7E0A02FF 40400000 v_mad_f32 v4, v5, v4, -1.0 ; D2820004 03CE0905 v_mul_f32_e32 v5, v7, v3 ; 100A0707 v_mul_f32_e32 v6, v3, v3 ; 100C0703 v_mul_f32_e32 v11, v3, v2 ; 10160503 v_mov_b32_e32 v16, 0xbf82fc67 ; 7E2002FF BF82FC67 v_mul_f32_e32 v3, v16, v3 ; 10060710 v_mul_f32_e32 v16, v16, v2 ; 10200510 v_mad_f32 v6, v2, v2, -v6 ; D2820006 841A0502 s_buffer_load_dword s6, s[12:15], 0x21 ; C2030D21 s_buffer_load_dword s7, s[12:15], 0x20 ; C2038D20 v_mul_f32_e32 v2, v7, v2 ; 10040507 v_mov_b32_e32 v18, 0xbf5bab79 ; 7E2402FF BF5BAB79 v_mul_f32_e32 v5, v18, v5 ; 100A0B12 v_mul_f32_e32 v2, v18, v2 ; 10040512 s_buffer_load_dword s16, s[12:15], 0x29 ; C2080D29 s_buffer_load_dword s17, s[12:15], 0x28 ; C2088D28 v_mul_f32_e32 v4, 0x3e7da73e, v4 ; 100808FF 3E7DA73E s_buffer_load_dword s18, s[12:15], 0x31 ; C2090D31 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v18, s6, v4 ; 10240806 v_mad_f32 v18, s7, v5, v18 ; D2820012 044A0A07 s_buffer_load_dword s6, s[12:15], 0x30 ; C2030D30 s_buffer_load_dword s7, s[12:15], 0x22 ; C2038D22 s_buffer_load_dword s19, s[12:15], 0x2a ; C2098D2A s_buffer_load_dword s20, s[12:15], 0x2b ; C20A0D2B v_mul_f32_e32 v19, s16, v4 ; 10260810 v_mad_f32 v19, s17, v5, v19 ; D2820013 044E0A11 s_buffer_load_dword s16, s[12:15], 0x32 ; C2080D32 s_buffer_load_dword s17, s[12:15], 0x33 ; C2088D33 v_mul_f32_e32 v4, s18, v4 ; 10080812 s_buffer_load_dword s18, s[12:15], 0x23 ; C2090D23 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s6, v5, v4 ; D2820004 04120A06 s_buffer_load_dword s6, s[12:15], 0x24 ; C2030D24 s_buffer_load_dword s21, s[12:15], 0x25 ; C20A8D25 s_buffer_load_dword s22, s[12:15], 0x26 ; C20B0D26 v_mad_f32 v5, s7, v2, v18 ; D2820005 044A0407 v_mad_f32 v18, s19, v2, v19 ; D2820012 044E0413 v_mad_f32 v2, s16, v2, v4 ; D2820002 04120410 v_mul_f32_e32 v4, 0x3edbab79, v6 ; 10080CFF 3EDBAB79 s_buffer_load_dword s7, s[12:15], 0x1d ; C2038D1D v_mad_f32 v5, s18, v4, v5 ; D2820005 04160812 v_mad_f32 v6, s20, v4, v18 ; D2820006 044A0814 v_mad_f32 v2, s17, v4, v2 ; D2820002 040A0811 v_mul_f32_e32 v4, 0x3f82fc67, v7 ; 10080EFF 3F82FC67 s_buffer_load_dword s16, s[12:15], 0x2d ; C2080D2D s_buffer_load_dword s17, s[12:15], 0x2c ; C2088D2C s_buffer_load_dword s18, s[12:15], 0x1e ; C2090D1E s_buffer_load_dword s19, s[12:15], 0x1f ; C2098D1F s_buffer_load_dword s20, s[12:15], 0x2e ; C20A0D2E s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s7, v4 ; 100E0807 v_mad_f32 v7, s11, v3, v7 ; D2820007 041E060B v_mul_f32_e32 v18, s21, v4 ; 10240815 v_mad_f32 v18, s6, v3, v18 ; D2820012 044A0606 s_buffer_load_dword s6, s[12:15], 0x2f ; C2030D2F v_mul_f32_e32 v4, s16, v4 ; 10080810 v_mad_f32 v3, s17, v3, v4 ; D2820003 04120611 s_buffer_load_dword s7, s[12:15], 0x27 ; C2038D27 v_mad_f32 v4, s18, v16, v7 ; D2820004 041E2012 v_mad_f32 v7, s22, v16, v18 ; D2820007 044A2016 v_mad_f32 v3, s20, v16, v3 ; D2820003 040E2014 s_buffer_load_dword s11, s[12:15], 0x3e ; C2058D3E s_buffer_load_dword s16, s[12:15], 0x40 ; C2080D40 s_buffer_load_dword s17, s[12:15], 0x41 ; C2088D41 s_buffer_load_dword s12, s[12:15], 0x42 ; C2060D42 v_mul_f32_e32 v11, 0x3f5bab79, v11 ; 101616FF 3F5BAB79 v_mad_f32 v4, s19, v11, v4 ; D2820004 04121613 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s7, v11, v7 ; D2820007 041E1607 v_mad_f32 v3, s6, v11, v3 ; D2820003 040E1606 v_mov_b32_e32 v11, 0x3f62dfd0 ; 7E1602FF 3F62DFD0 v_mad_f32 v4, s2, v11, v4 ; D2820004 04121602 v_mad_f32 v3, s10, v11, v3 ; D2820003 040E160A v_mad_f32 v7, s3, v11, v7 ; D2820007 041E1603 v_mul_f32_e32 v1, v1, v30 ; 10023D01 v_mad_f32 v1, v38, v15, v1 ; D2820001 04061F26 v_mad_f32 v0, v32, v0, v1 ; D2820000 04060120 v_add_f32_e32 v1, v4, v5 ; 06020B04 v_add_f32_e64 v4, 0, v24 clamp ; D2060804 00023080 v_and_b32_e32 v5, 0x7fffffff, v4 ; 360A08FF 7FFFFFFF v_log_f32_e32 v5, v5 ; 7E0A4F05 v_mul_f32_e32 v11, v1, v8 ; 10161101 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, v11, 0, vcc ; D2000001 01A9010B v_add_f32_e32 v1, s0, v1 ; 06020200 v_mul_legacy_f32_e32 v5, s1, v5 ; 0E0A0A01 v_mov_b32_e32 v11, 0x41000000 ; 7E1602FF 41000000 v_add_f32_e32 v11, s1, v11 ; 06161601 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_mul_f32_e32 v5, v5, v11 ; 100A1705 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_add_f32_e32 v4, v14, v4 ; 0608090E v_mul_f32_e32 v5, 0x3d22f983, v5 ; 100A0AFF 3D22F983 v_mul_f32_e32 v11, v5, v12 ; 10161905 v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v4, v11, 0, vcc ; D2000004 01A9010B v_add_f32_e32 v11, v14, v0 ; 0616010E v_cmp_gt_f32_e64 s[0:1], 0, v11 ; D0080000 00021680 v_mul_f32_e32 v11, v8, v0 ; 10160108 v_cndmask_b32_e64 v11, v11, 0, s[0:1] ; D200000B 0001010B v_add_f32_e32 v4, v11, v4 ; 0608090B v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 v_add_f32_e32 v4, v7, v6 ; 06080D07 v_mul_f32_e32 v6, v4, v9 ; 100C1304 v_cmp_gt_f32_e64 s[2:3], 0, v4 ; D0080002 00020880 v_cndmask_b32_e64 v4, v6, 0, s[2:3] ; D2000004 00090106 v_mul_f32_e32 v6, v5, v13 ; 100C1B05 v_mul_f32_e32 v5, v5, v17 ; 100A2305 v_cndmask_b32_e64 v6, v6, 0, vcc ; D2000006 01A90106 v_cndmask_b32_e64 v5, v5, 0, vcc ; D2000005 01A90105 v_mul_f32_e32 v7, v9, v0 ; 100E0109 v_mul_f32_e32 v0, v10, v0 ; 1000010A v_cndmask_b32_e64 v7, v7, 0, s[0:1] ; D2000007 00010107 v_cndmask_b32_e64 v0, v0, 0, s[0:1] ; D2000000 00010100 v_add_f32_e32 v2, v3, v2 ; 06040503 v_add_f32_e32 v3, v7, v6 ; 06060D07 v_add_f32_e32 v0, v0, v5 ; 06000B00 v_add_f32_e32 v4, s9, v4 ; 06080809 v_mad_f32 v3, v3, s5, v4 ; D2820003 04100B03 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_mul_f32_e32 v2, v2, v10 ; 10041502 v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_add_f32_e32 v2, s8, v2 ; 06040408 v_mad_f32 v0, v0, s11, v2 ; D2820000 04081700 v_mad_f32 v1, v8, s16, v1 ; D2820001 04042108 v_mad_f32 v2, v9, s17, v3 ; D2820002 040C2309 v_mad_f32 v0, v10, s12, v0 ; D2820000 0400190A v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_mul_f32_e32 v2, 0x3b800000, v27 ; 100436FF 3B800000 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 48 Code Size: 1912 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], COLOR, PERSPECTIVE DCL IN[1], COLOR[1], PERSPECTIVE DCL IN[2], TEXCOORD[0], PERSPECTIVE DCL IN[3], TEXCOORD[4], PERSPECTIVE DCL IN[4], TEXCOORD[5], PERSPECTIVE DCL IN[5], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL CONST[0..17] DCL TEMP[0] DCL TEMP[1], LOCAL DCL TEMP[2..6] IMM[0] FLT32 { 0.3000, 0.5900, 0.1100, -0.0000} IMM[1] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[2] FLT32 { 0.8862, 20.0000, -0.8000, 0.2000} IMM[3] FLT32 { 8.0000, 0.0398, 0.0039, 0.0000} IMM[4] FLT32 { 3.0000, -1.0000, -1.0233, 1.0233} IMM[5] FLT32 { 0.8581, -0.8581, 0.2477, 0.4290} IMM[6] FLT32 {340282346638528859811704183484516925440.0000, -340282346638528859811704183484516925440.0000, 0.0000, 0.0000} 0: TEX TEMP[0], IN[2], SAMP[2], 2D 1: MAD TEMP[0].xyz, TEMP[0], IMM[1].xxxx, IMM[1].yyyy 2: DP3 TEMP[1].x, IN[5], IN[5] 3: RSQ TEMP[1].x, TEMP[1].xxxx 4: MIN TEMP[1].x, IMM[6].xxxx, TEMP[1].xxxx 5: MUL TEMP[2].xyz, IN[5], TEMP[1].xxxx 6: DP3 TEMP[0].x, TEMP[0], TEMP[2] 7: MAX TEMP[2].w, TEMP[0].xxxx, IMM[1].wwww 8: MOV_SAT TEMP[0].x, TEMP[0].xxxx 9: ADD TEMP[0].y, TEMP[2].wwww, IMM[2].zzzz 10: ADD TEMP[0].z, -TEMP[2].wwww, IMM[1].zzzz 11: CMP TEMP[0].y, TEMP[0].yyyy, TEMP[0].zzzz, IMM[2].wwww 12: MUL TEMP[0].y, TEMP[0].yyyy, TEMP[0].yyyy 13: MUL TEMP[0].y, TEMP[0].yyyy, TEMP[0].yyyy 14: TEX TEMP[3], IN[2], SAMP[0], 2D 15: MAD TEMP[3].xyz, TEMP[3], IMM[1].xxxx, IMM[1].yyyy 16: DP3 TEMP[1].x, TEMP[3], TEMP[3] 17: RSQ TEMP[1].x, TEMP[1].xxxx 18: MIN TEMP[1].x, IMM[6].xxxx, TEMP[1].xxxx 19: MUL TEMP[4].xyz, TEMP[3], TEMP[1].xxxx 20: DP3 TEMP[0].z, TEMP[4], TEMP[2] 21: MUL TEMP[3].xyz, TEMP[0].zzzz, TEMP[4] 22: MAD TEMP[2].xyz, TEMP[3], IMM[1].xxxx, -TEMP[2] 23: DP3 TEMP[1].x, IN[3], IN[3] 24: RSQ TEMP[1].x, TEMP[1].xxxx 25: MIN TEMP[1].x, IMM[6].xxxx, TEMP[1].xxxx 26: MUL TEMP[3].xyz, IN[3], TEMP[1].xxxx 27: DP3_SAT TEMP[0].z, TEMP[2], TEMP[3] 28: DP3_SAT TEMP[0].w, TEMP[4], TEMP[3] 29: LG2 TEMP[1].x, |TEMP[0].zzzz| 30: MAX TEMP[2].x, IMM[6].yyyy, TEMP[1].xxxx 31: ADD TEMP[0].z, TEMP[0].zzzz, IMM[0].wwww 32: MUL TEMP[2].y, TEMP[2].xxxx, IMM[2].yyyy 33: MUL TEMP[2].x, TEMP[2].xxxx, CONST[15].yyyy 34: EX2 TEMP[2].x, TEMP[2].xxxx 35: EX2 TEMP[2].y, TEMP[2].yyyy 36: CMP TEMP[2].y, TEMP[0].zzzz, IMM[1].wwww, TEMP[2].yyyy 37: MUL TEMP[0].y, TEMP[0].yyyy, TEMP[2].yyyy 38: TEX TEMP[3], IN[2], SAMP[3], 2D 39: MUL TEMP[2].yzw, TEMP[0].yyyy, TEMP[3].xxyz 40: MUL TEMP[3].xyz, TEMP[3], CONST[15].xxxx 41: MAD TEMP[2].yzw, CONST[14].wwww, TEMP[2], TEMP[3].xxyz 42: MUL TEMP[2].yzw, TEMP[2], CONST[14].zzzz 43: MAD TEMP[2].yzw, TEMP[2], CONST[5].wwww, CONST[5].xxyz 44: MOV TEMP[0].y, CONST[15].yyyy 45: ADD TEMP[0].y, TEMP[0].yyyy, IMM[3].xxxx 46: MUL TEMP[0].y, TEMP[0].yyyy, TEMP[2].xxxx 47: MUL TEMP[0].y, TEMP[0].yyyy, IMM[3].yyyy 48: CMP TEMP[0].y, TEMP[0].zzzz, IMM[1].wwww, TEMP[0].yyyy 49: MUL TEMP[2].xyz, TEMP[2].yzww, TEMP[0].yyyy 50: ADD TEMP[0].y, TEMP[0].xxxx, IMM[0].wwww 51: POW TEMP[2].w, |TEMP[0].xxxx|, CONST[14].yyyy 52: CMP TEMP[0].x, TEMP[0].yyyy, IMM[1].wwww, TEMP[2].wwww 53: MUL TEMP[3].xyz, TEMP[0].xxxx, CONST[13] 54: TEX TEMP[5], IN[2], SAMP[1], 2D 55: DP3 TEMP[0].y, TEMP[5], IMM[0] 56: LRP TEMP[6].xyz, CONST[14].xxxx, TEMP[0].yyyy, TEMP[5] 57: MAD TEMP[3].xyz, TEMP[6], TEMP[3], -TEMP[5] 58: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[3], TEMP[5] 59: MUL TEMP[0].xyz, TEMP[0], CONST[14].zzzz 60: MOV TEMP[3].z, IMM[1].zzzz 61: ADD TEMP[3].xyz, TEMP[3].zzzz, -CONST[0] 62: MUL TEMP[0].xyz, TEMP[0], TEMP[3] 63: MAD TEMP[0].xyz, TEMP[0], CONST[4].wwww, CONST[4] 64: ADD TEMP[2].w, TEMP[0].wwww, IMM[0].wwww 65: CMP TEMP[0].w, TEMP[2].wwww, IMM[1].wwww, TEMP[0].wwww 66: MAD TEMP[2].xyz, TEMP[0], TEMP[0].wwww, TEMP[2] 67: DP3 TEMP[1].x, IN[1], IN[1] 68: RSQ TEMP[1].x, TEMP[1].xxxx 69: MIN TEMP[1].x, IMM[6].xxxx, TEMP[1].xxxx 70: MUL TEMP[3].xyz, IN[1], TEMP[1].xxxx 71: DP3 TEMP[1].x, IN[0], IN[0] 72: RSQ TEMP[1].x, TEMP[1].xxxx 73: MIN TEMP[1].x, IMM[6].xxxx, TEMP[1].xxxx 74: MUL TEMP[5].xyz, IN[0], TEMP[1].xxxx 75: MUL TEMP[6].xyz, TEMP[3].zxyw, TEMP[5].yzxw 76: MAD TEMP[6].xyz, TEMP[3].yzxw, TEMP[5].zxyw, -TEMP[6] 77: DP3 TEMP[3].y, TEMP[3], TEMP[4] 78: DP3 TEMP[3].z, TEMP[5], TEMP[4] 79: MUL TEMP[5].xyz, TEMP[6], IN[1].wwww 80: DP3 TEMP[3].x, TEMP[5], TEMP[4] 81: MUL TEMP[4].xz, TEMP[3], TEMP[3].yyyy 82: MUL TEMP[5].xyz, TEMP[3], TEMP[3].xyxw 83: MUL TEMP[6].xyz, TEMP[3], IMM[4].zwzw 84: MAD TEMP[4].w, TEMP[3].zzzz, TEMP[3].zzzz, -TEMP[5].xxxx 85: MAD TEMP[4].y, TEMP[5].yyyy, IMM[4].xxxx, IMM[4].yyyy 86: MUL TEMP[6].w, TEMP[5].zzzz, IMM[5].xxxx 87: MUL TEMP[3], TEMP[4], IMM[5].yzyw 88: DP4 TEMP[0].w, CONST[8], TEMP[3] 89: DP4 TEMP[2].w, CONST[7], TEMP[6] 90: MOV TEMP[4].x, IMM[2].xxxx 91: MAD TEMP[2].w, CONST[6].xxxx, TEMP[4].xxxx, TEMP[2].wwww 92: ADD TEMP[5].x, TEMP[0].wwww, TEMP[2].wwww 93: DP4 TEMP[0].w, CONST[10], TEMP[3] 94: DP4 TEMP[2].w, CONST[12], TEMP[3] 95: DP4 TEMP[3].x, CONST[9], TEMP[6] 96: DP4 TEMP[3].y, CONST[11], TEMP[6] 97: MAD TEMP[3].y, CONST[6].zzzz, TEMP[4].xxxx, TEMP[3].yyyy 98: ADD TEMP[5].z, TEMP[2].wwww, TEMP[3].yyyy 99: MAD TEMP[2].w, CONST[6].yyyy, TEMP[4].xxxx, TEMP[3].xxxx 100: ADD TEMP[5].y, TEMP[0].wwww, TEMP[2].wwww 101: MAX TEMP[3].xyz, TEMP[5], IMM[1].wwww 102: MAD TEMP[3].xyz, TEMP[0], TEMP[3], CONST[0] 103: MAD TEMP[2].xyz, TEMP[2], CONST[16], TEMP[3] 104: MAD OUT[0].xyz, TEMP[0], CONST[17], TEMP[2] 105: MUL OUT[0].w, IMM[3].zzzz, IN[4].wwww 106: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %77 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %78 = load <32 x i8>, <32 x i8> addrspace(2)* %77, align 32, !tbaa !0 %79 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %82 = bitcast <8 x i32> addrspace(2)* %81 to <32 x i8> addrspace(2)* %83 = load <32 x i8>, <32 x i8> addrspace(2)* %82, align 32, !tbaa !0 %84 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %85 = bitcast <4 x i32> addrspace(2)* %84 to <16 x i8> addrspace(2)* %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 %87 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %88 = bitcast <8 x i32> addrspace(2)* %87 to <32 x i8> addrspace(2)* %89 = load <32 x i8>, <32 x i8> addrspace(2)* %88, align 32, !tbaa !0 %90 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %91 = bitcast <4 x i32> addrspace(2)* %90 to <16 x i8> addrspace(2)* %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0 %93 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %94 = bitcast <8 x i32> addrspace(2)* %93 to <32 x i8> addrspace(2)* %95 = load <32 x i8>, <32 x i8> addrspace(2)* %94, align 32, !tbaa !0 %96 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %97 = bitcast <4 x i32> addrspace(2)* %96 to <16 x i8> addrspace(2)* %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 %99 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %115 = bitcast float %106 to i32 %116 = bitcast float %107 to i32 %117 = insertelement <2 x i32> undef, i32 %115, i32 0 %118 = insertelement <2 x i32> %117, i32 %116, i32 1 %119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %118, <32 x i8> %89, <16 x i8> %92, i32 2) %120 = extractelement <4 x float> %119, i32 0 %121 = extractelement <4 x float> %119, i32 1 %122 = extractelement <4 x float> %119, i32 2 %123 = fmul float %120, 2.000000e+00 %124 = fadd float %123, -1.000000e+00 %125 = fmul float %121, 2.000000e+00 %126 = fadd float %125, -1.000000e+00 %127 = fmul float %122, 2.000000e+00 %128 = fadd float %127, -1.000000e+00 %129 = fmul float %112, %112 %130 = fmul float %113, %113 %131 = fadd float %130, %129 %132 = fmul float %114, %114 %133 = fadd float %131, %132 %134 = call float @llvm.AMDGPU.rsq.clamped.f32(float %133) %135 = call float @llvm.minnum.f32(float %134, float 0x47EFFFFFE0000000) %136 = fmul float %112, %135 %137 = fmul float %113, %135 %138 = fmul float %114, %135 %139 = fmul float %124, %136 %140 = fmul float %126, %137 %141 = fadd float %140, %139 %142 = fmul float %128, %138 %143 = fadd float %141, %142 %144 = call float @llvm.maxnum.f32(float %143, float 0.000000e+00) %145 = call float @llvm.AMDIL.clamp.(float %143, float 0.000000e+00, float 1.000000e+00) %146 = fadd float %144, 0xBFE99999A0000000 %147 = fsub float 1.000000e+00, %144 %148 = call float @llvm.AMDGPU.cndlt(float %146, float %147, float 0x3FC9999980000000) %149 = fmul float %148, %148 %150 = fmul float %149, %149 %151 = bitcast float %106 to i32 %152 = bitcast float %107 to i32 %153 = insertelement <2 x i32> undef, i32 %151, i32 0 %154 = insertelement <2 x i32> %153, i32 %152, i32 1 %155 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %154, <32 x i8> %78, <16 x i8> %80, i32 2) %156 = extractelement <4 x float> %155, i32 0 %157 = extractelement <4 x float> %155, i32 1 %158 = extractelement <4 x float> %155, i32 2 %159 = fmul float %156, 2.000000e+00 %160 = fadd float %159, -1.000000e+00 %161 = fmul float %157, 2.000000e+00 %162 = fadd float %161, -1.000000e+00 %163 = fmul float %158, 2.000000e+00 %164 = fadd float %163, -1.000000e+00 %165 = fmul float %160, %160 %166 = fmul float %162, %162 %167 = fadd float %166, %165 %168 = fmul float %164, %164 %169 = fadd float %167, %168 %170 = call float @llvm.AMDGPU.rsq.clamped.f32(float %169) %171 = call float @llvm.minnum.f32(float %170, float 0x47EFFFFFE0000000) %172 = fmul float %160, %171 %173 = fmul float %162, %171 %174 = fmul float %164, %171 %175 = fmul float %172, %136 %176 = fmul float %173, %137 %177 = fadd float %176, %175 %178 = fmul float %174, %138 %179 = fadd float %177, %178 %180 = fmul float %179, %172 %181 = fmul float %179, %173 %182 = fmul float %179, %174 %183 = fmul float %180, 2.000000e+00 %184 = fsub float %183, %136 %185 = fmul float %181, 2.000000e+00 %186 = fsub float %185, %137 %187 = fmul float %182, 2.000000e+00 %188 = fsub float %187, %138 %189 = fmul float %108, %108 %190 = fmul float %109, %109 %191 = fadd float %190, %189 %192 = fmul float %110, %110 %193 = fadd float %191, %192 %194 = call float @llvm.AMDGPU.rsq.clamped.f32(float %193) %195 = call float @llvm.minnum.f32(float %194, float 0x47EFFFFFE0000000) %196 = fmul float %108, %195 %197 = fmul float %109, %195 %198 = fmul float %110, %195 %199 = fmul float %184, %196 %200 = fmul float %186, %197 %201 = fadd float %200, %199 %202 = fmul float %188, %198 %203 = fadd float %201, %202 %204 = call float @llvm.AMDIL.clamp.(float %203, float 0.000000e+00, float 1.000000e+00) %205 = fmul float %172, %196 %206 = fmul float %173, %197 %207 = fadd float %206, %205 %208 = fmul float %174, %198 %209 = fadd float %207, %208 %210 = call float @llvm.AMDIL.clamp.(float %209, float 0.000000e+00, float 1.000000e+00) %211 = call float @fabs(float %204) %212 = call float @llvm.log2.f32(float %211) %213 = call float @llvm.maxnum.f32(float %212, float 0xC7EFFFFFE0000000) %214 = fadd float %204, 0xBEB0C6F7A0000000 %215 = fmul float %213, 2.000000e+01 %216 = fmul float %213, %70 %217 = call float @llvm.AMDIL.exp.(float %216) %218 = call float @llvm.AMDIL.exp.(float %215) %219 = call float @llvm.AMDGPU.cndlt(float %214, float 0.000000e+00, float %218) %220 = fmul float %150, %219 %221 = bitcast float %106 to i32 %222 = bitcast float %107 to i32 %223 = insertelement <2 x i32> undef, i32 %221, i32 0 %224 = insertelement <2 x i32> %223, i32 %222, i32 1 %225 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %224, <32 x i8> %95, <16 x i8> %98, i32 2) %226 = extractelement <4 x float> %225, i32 0 %227 = extractelement <4 x float> %225, i32 1 %228 = extractelement <4 x float> %225, i32 2 %229 = fmul float %220, %226 %230 = fmul float %220, %227 %231 = fmul float %220, %228 %232 = fmul float %226, %69 %233 = fmul float %227, %69 %234 = fmul float %228, %69 %235 = fmul float %68, %229 %236 = fadd float %235, %232 %237 = fmul float %68, %230 %238 = fadd float %237, %233 %239 = fmul float %68, %231 %240 = fadd float %239, %234 %241 = fmul float %236, %67 %242 = fmul float %238, %67 %243 = fmul float %240, %67 %244 = fmul float %241, %34 %245 = fadd float %244, %31 %246 = fmul float %242, %34 %247 = fadd float %246, %32 %248 = fmul float %243, %34 %249 = fadd float %248, %33 %250 = fadd float %70, 8.000000e+00 %251 = fmul float %250, %217 %252 = fmul float %251, 0x3FA45F3060000000 %253 = call float @llvm.AMDGPU.cndlt(float %214, float 0.000000e+00, float %252) %254 = fmul float %245, %253 %255 = fmul float %247, %253 %256 = fmul float %249, %253 %257 = fadd float %145, 0xBEB0C6F7A0000000 %258 = call float @fabs(float %145) %259 = call float @llvm.pow.f32(float %258, float %66) %260 = call float @llvm.AMDGPU.cndlt(float %257, float 0.000000e+00, float %259) %261 = fmul float %260, %62 %262 = fmul float %260, %63 %263 = fmul float %260, %64 %264 = bitcast float %106 to i32 %265 = bitcast float %107 to i32 %266 = insertelement <2 x i32> undef, i32 %264, i32 0 %267 = insertelement <2 x i32> %266, i32 %265, i32 1 %268 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %267, <32 x i8> %83, <16 x i8> %86, i32 2) %269 = extractelement <4 x float> %268, i32 0 %270 = extractelement <4 x float> %268, i32 1 %271 = extractelement <4 x float> %268, i32 2 %272 = fmul float %269, 0x3FD3333340000000 %273 = fmul float %270, 0x3FE2E147A0000000 %274 = fadd float %273, %272 %275 = fmul float %271, 0x3FBC28F5C0000000 %276 = fadd float %274, %275 %277 = call float @llvm.AMDGPU.lrp(float %65, float %276, float %269) %278 = call float @llvm.AMDGPU.lrp(float %65, float %276, float %270) %279 = call float @llvm.AMDGPU.lrp(float %65, float %276, float %271) %280 = fmul float %277, %261 %281 = fsub float %280, %269 %282 = fmul float %278, %262 %283 = fsub float %282, %270 %284 = fmul float %279, %263 %285 = fsub float %284, %271 %286 = fmul float %260, %281 %287 = fadd float %286, %269 %288 = fmul float %260, %283 %289 = fadd float %288, %270 %290 = fmul float %260, %285 %291 = fadd float %290, %271 %292 = fmul float %287, %67 %293 = fmul float %289, %67 %294 = fmul float %291, %67 %295 = fsub float 1.000000e+00, %24 %296 = fsub float 1.000000e+00, %25 %297 = fsub float 1.000000e+00, %26 %298 = fmul float %292, %295 %299 = fmul float %293, %296 %300 = fmul float %294, %297 %301 = fmul float %298, %30 %302 = fadd float %301, %27 %303 = fmul float %299, %30 %304 = fadd float %303, %28 %305 = fmul float %300, %30 %306 = fadd float %305, %29 %307 = fadd float %210, 0xBEB0C6F7A0000000 %308 = call float @llvm.AMDGPU.cndlt(float %307, float 0.000000e+00, float %210) %309 = fmul float %302, %308 %310 = fadd float %309, %254 %311 = fmul float %304, %308 %312 = fadd float %311, %255 %313 = fmul float %306, %308 %314 = fadd float %313, %256 %315 = fmul float %102, %102 %316 = fmul float %103, %103 %317 = fadd float %316, %315 %318 = fmul float %104, %104 %319 = fadd float %317, %318 %320 = call float @llvm.AMDGPU.rsq.clamped.f32(float %319) %321 = call float @llvm.minnum.f32(float %320, float 0x47EFFFFFE0000000) %322 = fmul float %102, %321 %323 = fmul float %103, %321 %324 = fmul float %104, %321 %325 = fmul float %99, %99 %326 = fmul float %100, %100 %327 = fadd float %326, %325 %328 = fmul float %101, %101 %329 = fadd float %327, %328 %330 = call float @llvm.AMDGPU.rsq.clamped.f32(float %329) %331 = call float @llvm.minnum.f32(float %330, float 0x47EFFFFFE0000000) %332 = fmul float %99, %331 %333 = fmul float %100, %331 %334 = fmul float %101, %331 %335 = fmul float %324, %333 %336 = fmul float %322, %334 %337 = fmul float %323, %332 %338 = fmul float %323, %334 %339 = fsub float %338, %335 %340 = fmul float %324, %332 %341 = fsub float %340, %336 %342 = fmul float %322, %333 %343 = fsub float %342, %337 %344 = fmul float %322, %172 %345 = fmul float %323, %173 %346 = fadd float %345, %344 %347 = fmul float %324, %174 %348 = fadd float %346, %347 %349 = fmul float %332, %172 %350 = fmul float %333, %173 %351 = fadd float %350, %349 %352 = fmul float %334, %174 %353 = fadd float %351, %352 %354 = fmul float %339, %105 %355 = fmul float %341, %105 %356 = fmul float %343, %105 %357 = fmul float %354, %172 %358 = fmul float %355, %173 %359 = fadd float %358, %357 %360 = fmul float %356, %174 %361 = fadd float %359, %360 %362 = fmul float %361, %348 %363 = fmul float %353, %348 %364 = fmul float %361, %361 %365 = fmul float %348, %348 %366 = fmul float %353, %361 %367 = fmul float %361, 0xBFF05F8CE0000000 %368 = fmul float %348, 0x3FF05F8CE0000000 %369 = fmul float %353, 0xBFF05F8CE0000000 %370 = fmul float %353, %353 %371 = fsub float %370, %364 %372 = fmul float %365, 3.000000e+00 %373 = fadd float %372, -1.000000e+00 %374 = fmul float %366, 0x3FEB756F20000000 %375 = fmul float %362, 0xBFEB756F20000000 %376 = fmul float %373, 0x3FCFB4E7C0000000 %377 = fmul float %363, 0xBFEB756F20000000 %378 = fmul float %371, 0x3FDB756F20000000 %379 = fmul float %42, %375 %380 = fmul float %43, %376 %381 = fadd float %379, %380 %382 = fmul float %44, %377 %383 = fadd float %381, %382 %384 = fmul float %45, %378 %385 = fadd float %383, %384 %386 = fmul float %38, %367 %387 = fmul float %39, %368 %388 = fadd float %386, %387 %389 = fmul float %40, %369 %390 = fadd float %388, %389 %391 = fmul float %41, %374 %392 = fadd float %390, %391 %393 = fmul float %35, 0x3FEC5BFA00000000 %394 = fadd float %393, %392 %395 = fadd float %385, %394 %396 = fmul float %50, %375 %397 = fmul float %51, %376 %398 = fadd float %396, %397 %399 = fmul float %52, %377 %400 = fadd float %398, %399 %401 = fmul float %53, %378 %402 = fadd float %400, %401 %403 = fmul float %58, %375 %404 = fmul float %59, %376 %405 = fadd float %403, %404 %406 = fmul float %60, %377 %407 = fadd float %405, %406 %408 = fmul float %61, %378 %409 = fadd float %407, %408 %410 = fmul float %46, %367 %411 = fmul float %47, %368 %412 = fadd float %410, %411 %413 = fmul float %48, %369 %414 = fadd float %412, %413 %415 = fmul float %49, %374 %416 = fadd float %414, %415 %417 = fmul float %54, %367 %418 = fmul float %55, %368 %419 = fadd float %417, %418 %420 = fmul float %56, %369 %421 = fadd float %419, %420 %422 = fmul float %57, %374 %423 = fadd float %421, %422 %424 = fmul float %37, 0x3FEC5BFA00000000 %425 = fadd float %424, %423 %426 = fadd float %409, %425 %427 = fmul float %36, 0x3FEC5BFA00000000 %428 = fadd float %427, %416 %429 = fadd float %402, %428 %430 = call float @llvm.maxnum.f32(float %395, float 0.000000e+00) %431 = call float @llvm.maxnum.f32(float %429, float 0.000000e+00) %432 = call float @llvm.maxnum.f32(float %426, float 0.000000e+00) %433 = fmul float %302, %430 %434 = fadd float %433, %24 %435 = fmul float %304, %431 %436 = fadd float %435, %25 %437 = fmul float %306, %432 %438 = fadd float %437, %26 %439 = fmul float %310, %71 %440 = fadd float %439, %434 %441 = fmul float %312, %72 %442 = fadd float %441, %436 %443 = fmul float %314, %73 %444 = fadd float %443, %438 %445 = fmul float %302, %74 %446 = fadd float %445, %440 %447 = fmul float %304, %75 %448 = fadd float %447, %442 %449 = fmul float %306, %76 %450 = fadd float %449, %444 %451 = fmul float %111, 3.906250e-03 %452 = call i32 @llvm.SI.packf16(float %446, float %448) %453 = bitcast i32 %452 to float %454 = call i32 @llvm.SI.packf16(float %450, float %451) %455 = bitcast i32 %454 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %453, float %455, float %453, float %455) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_load_dwordx4 s[36:39], s[4:5], 0x0 ; C0920500 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s2, s[8:11], 0x0 ; C2010900 s_buffer_load_dword s1, s[8:11], 0x1 ; C2008901 s_buffer_load_dword s0, s[8:11], 0x2 ; C2000902 s_buffer_load_dword s3, s[8:11], 0x10 ; C2018910 s_buffer_load_dword s12, s[8:11], 0x11 ; C2060911 s_buffer_load_dword s13, s[8:11], 0x12 ; C2068912 s_buffer_load_dword s19, s[8:11], 0x13 ; C2098913 s_buffer_load_dword s17, s[8:11], 0x14 ; C2088914 s_buffer_load_dword s21, s[8:11], 0x15 ; C20A8915 s_buffer_load_dword s22, s[8:11], 0x16 ; C20B0916 s_buffer_load_dword s20, s[8:11], 0x17 ; C20A0917 s_buffer_load_dword s14, s[8:11], 0x18 ; C2070918 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s3 ; 7E0E0203 v_mov_b32_e32 v8, s12 ; 7E10020C s_buffer_load_dword s15, s[8:11], 0x19 ; C2078919 v_mov_b32_e32 v9, s13 ; 7E12020D s_buffer_load_dword s16, s[8:11], 0x1a ; C208091A s_buffer_load_dword s18, s[8:11], 0x1c ; C209091C v_mov_b32_e32 v10, s17 ; 7E140211 s_buffer_load_dword s24, s[8:11], 0x36 ; C20C0936 v_mov_b32_e32 v11, s21 ; 7E160215 s_buffer_load_dword s25, s[8:11], 0x38 ; C20C8938 v_mov_b32_e32 v12, s22 ; 7E180216 s_buffer_load_dword s26, s[8:11], 0x39 ; C20D0939 v_interp_p1_f32 v13, v0, 2, 1, [m0] ; C8340600 v_interp_p2_f32 v13, [v13], v1, 2, 1, [m0] ; C8350601 v_interp_p1_f32 v14, v0, 3, 1, [m0] ; C8380700 v_interp_p2_f32 v14, [v14], v1, 3, 1, [m0] ; C8390701 v_mov_b32_e32 v15, 0xbf4ccccd ; 7E1E02FF BF4CCCCD v_mov_b32_e32 v16, 0x3e4ccccc ; 7E2002FF 3E4CCCCC v_mov_b32_e32 v17, 0xff7fffff ; 7E2202FF FF7FFFFF v_mov_b32_e32 v18, 0xb58637bd ; 7E2402FF B58637BD v_sub_f32_e64 v19, 1.0, s2 ; D2080013 000004F2 v_sub_f32_e64 v20, 1.0, s1 ; D2080014 000002F2 v_sub_f32_e64 v21, 1.0, s0 ; D2080015 000000F2 s_buffer_load_dword s21, s[8:11], 0x3a ; C20A893A s_buffer_load_dword s22, s[8:11], 0x3b ; C20B093B s_buffer_load_dword s23, s[8:11], 0x3c ; C20B893C s_buffer_load_dword s17, s[8:11], 0x3d ; C208893D s_buffer_load_dword s13, s[8:11], 0x40 ; C2068940 s_buffer_load_dword s12, s[8:11], 0x41 ; C2060941 s_buffer_load_dword s3, s[8:11], 0x42 ; C2018942 s_buffer_load_dword s27, s[8:11], 0x34 ; C20D8934 s_buffer_load_dword s28, s[8:11], 0x35 ; C20E0935 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 s_load_dwordx4 s[52:55], s[4:5], 0x8 ; C09A0508 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v22, 1.0, s25 ; D2080016 000032F2 v_mul_f32_e32 v23, v5, v5 ; 102E0B05 v_mad_f32 v23, v6, v6, v23 ; D2820017 045E0D06 v_mad_f32 v23, v13, v13, v23 ; D2820017 045E1B0D v_rsq_clamp_f32_e32 v23, v23 ; 7E2E5917 v_interp_p1_f32 v24, v0, 0, 2, [m0] ; C8600800 v_interp_p2_f32 v24, [v24], v1, 0, 2, [m0] ; C8610801 v_interp_p1_f32 v25, v0, 1, 2, [m0] ; C8640900 v_interp_p2_f32 v25, [v25], v1, 1, 2, [m0] ; C8650901 v_interp_p1_f32 v26, v0, 0, 3, [m0] ; C8680C00 v_interp_p2_f32 v26, [v26], v1, 0, 3, [m0] ; C8690C01 v_interp_p1_f32 v27, v0, 1, 3, [m0] ; C86C0D00 v_interp_p2_f32 v27, [v27], v1, 1, 3, [m0] ; C86D0D01 v_interp_p1_f32 v28, v0, 2, 3, [m0] ; C8700E00 v_interp_p2_f32 v28, [v28], v1, 2, 3, [m0] ; C8710E01 v_interp_p1_f32 v29, v0, 3, 4, [m0] ; C8741300 v_interp_p2_f32 v29, [v29], v1, 3, 4, [m0] ; C8751301 v_interp_p1_f32 v30, v0, 0, 5, [m0] ; C8781400 v_interp_p2_f32 v30, [v30], v1, 0, 5, [m0] ; C8791401 s_load_dwordx8 s[68:75], s[6:7], 0x10 ; C0E20710 s_load_dwordx4 s[40:43], s[4:5], 0xc ; C094050C s_load_dwordx8 s[60:67], s[6:7], 0x0 ; C0DE0700 s_load_dwordx8 s[44:51], s[6:7], 0x18 ; C0D60718 v_interp_p1_f32 v31, v0, 1, 5, [m0] ; C87C1500 v_interp_p2_f32 v31, [v31], v1, 1, 5, [m0] ; C87D1501 v_interp_p1_f32 v0, v0, 2, 5, [m0] ; C8001600 v_interp_p2_f32 v0, [v0], v1, 2, 5, [m0] ; C8011601 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[32:34], 7, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[68:75], s[52:55] ; F0800700 01B12018 s_load_dwordx8 s[52:59], s[6:7], 0x8 ; C0DA0708 image_sample v[35:37], 7, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[60:67], s[36:39] ; F0800700 012F2318 image_sample v[38:40], 7, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[44:51], s[40:43] ; F0800700 014B2618 s_waitcnt vmcnt(2) ; BF8C0772 v_mad_f32 v1, 2.0, v32, -1.0 ; D2820001 03CE40F4 v_mad_f32 v32, 2.0, v33, -1.0 ; D2820020 03CE42F4 v_mad_f32 v33, 2.0, v34, -1.0 ; D2820021 03CE44F4 v_mul_f32_e32 v34, v30, v30 ; 10443D1E v_mad_f32 v34, v31, v31, v34 ; D2820022 048A3F1F v_mad_f32 v34, v0, v0, v34 ; D2820022 048A0100 v_rsq_clamp_f32_e32 v34, v34 ; 7E445922 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mad_f32 v35, 2.0, v35, -1.0 ; D2820023 03CE46F4 v_mad_f32 v36, 2.0, v36, -1.0 ; D2820024 03CE48F4 v_mad_f32 v37, 2.0, v37, -1.0 ; D2820025 03CE4AF4 v_min_f32_e32 v34, 0x7f7fffff, v34 ; 1E4444FF 7F7FFFFF v_mul_f32_e32 v41, v34, v30 ; 10523D22 v_mul_f32_e32 v1, v41, v1 ; 10020329 v_mul_f32_e32 v42, v34, v31 ; 10543F22 v_mad_f32 v1, v32, v42, v1 ; D2820001 04065520 v_mul_f32_e32 v32, v34, v0 ; 10400122 v_mad_f32 v1, v33, v32, v1 ; D2820001 04064121 v_add_f32_e64 v33, 0, v1 clamp ; D2060821 00020280 v_add_f32_e32 v43, v18, v33 ; 06564312 v_cmp_gt_f32_e32 vcc, 0, v43 ; 7C085680 v_and_b32_e32 v33, 0x7fffffff, v33 ; 364242FF 7FFFFFFF v_log_f32_e32 v33, v33 ; 7E424F21 image_sample v[43:45], 7, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[52:59], s[32:35] ; F0800700 010D2B18 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v24, 0x3e99999a, v43 ; 103056FF 3E99999A v_madmk_f32_e32 v24, v44, v24, 0x3f170a3d ; 4030312C 3F170A3D v_madmk_f32_e32 v24, v45, v24, 0x3de147ae ; 4030312D 3DE147AE v_mul_f32_e32 v25, v43, v22 ; 10322D2B v_mad_f32 v25, s25, v24, v25 ; D2820019 04663019 v_mul_f32_e32 v46, v44, v22 ; 105C2D2C v_mad_f32 v46, s25, v24, v46 ; D282002E 04BA3019 v_mul_f32_e32 v22, v45, v22 ; 102C2D2D v_mad_f32 v22, s25, v24, v22 ; D2820016 045A3019 v_mul_legacy_f32_e32 v24, s26, v33 ; 0E30421A v_exp_f32_e32 v24, v24 ; 7E304B18 v_cndmask_b32_e64 v24, v24, 0, vcc ; D2000018 01A90118 v_mul_f32_e32 v33, s27, v24 ; 1042301B v_mad_f32 v25, v25, v33, -v43 ; D2820019 84AE4319 v_mul_f32_e32 v33, s28, v24 ; 1042301C v_mad_f32 v33, v46, v33, -v44 ; D2820021 84B2432E v_mul_f32_e32 v46, s24, v24 ; 105C3018 v_mad_f32 v22, v22, v46, -v45 ; D2820016 84B65D16 v_mul_f32_e32 v46, v35, v35 ; 105C4723 v_mad_f32 v46, v36, v36, v46 ; D282002E 04BA4924 v_mad_f32 v46, v37, v37, v46 ; D282002E 04BA4B25 v_rsq_clamp_f32_e32 v46, v46 ; 7E5C592E v_mad_f32 v25, v24, v25, v43 ; D2820019 04AE3318 v_mad_f32 v33, v24, v33, v44 ; D2820021 04B24318 v_mad_f32 v22, v24, v22, v45 ; D2820016 04B62D18 v_min_f32_e32 v24, 0x7f7fffff, v46 ; 1E305CFF 7F7FFFFF v_mul_f32_e32 v35, v24, v35 ; 10464718 v_mul_f32_e32 v36, v24, v36 ; 10484918 v_mul_f32_e32 v24, v24, v37 ; 10304B18 v_mul_f32_e32 v37, v41, v35 ; 104A4729 v_mad_f32 v37, v36, v42, v37 ; D2820025 04965524 v_mad_f32 v32, v24, v32, v37 ; D2820020 04964118 v_mul_f32_e32 v37, v35, v32 ; 104A4123 v_mad_f32 v37, v32, v35, v37 ; D2820025 04964720 v_mad_f32 v30, -v30, v34, v37 ; D282001E 2496451E v_mul_f32_e32 v37, v36, v32 ; 104A4124 v_mad_f32 v37, v32, v36, v37 ; D2820025 04964920 v_mad_f32 v31, -v31, v34, v37 ; D282001F 2496451F v_mul_f32_e32 v37, v26, v26 ; 104A351A v_mad_f32 v37, v27, v27, v37 ; D2820025 0496371B v_mad_f32 v37, v28, v28, v37 ; D2820025 0496391C v_rsq_clamp_f32_e32 v37, v37 ; 7E4A5925 v_mul_f32_e32 v41, v24, v32 ; 10524118 v_mad_f32 v32, v32, v24, v41 ; D2820020 04A63120 v_mad_f32 v0, -v0, v34, v32 ; D2820000 24824500 v_min_f32_e32 v32, 0x7f7fffff, v37 ; 1E404AFF 7F7FFFFF v_mul_f32_e32 v26, v32, v26 ; 10343520 v_mul_f32_e32 v27, v32, v27 ; 10363720 v_mul_f32_e32 v28, v32, v28 ; 10383920 v_mul_f32_e32 v30, v26, v30 ; 103C3D1A v_mad_f32 v30, v31, v27, v30 ; D282001E 047A371F v_mad_f32 v0, v0, v28, v30 ; D2820000 047A3900 v_max_f32_e32 v1, 0, v1 ; 20020280 v_add_f32_e32 v15, v1, v15 ; 061E1F01 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_log_f32_e64 v30, |v0| ; D34E011E 00000100 v_cmp_gt_f32_e32 vcc, 0, v15 ; 7C081E80 v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_cndmask_b32_e64 v1, v16, v1, vcc ; D2000001 01AA0310 v_max_f32_e32 v15, v30, v17 ; 201E231E v_add_f32_e32 v0, v18, v0 ; 06000112 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_mul_f32_e32 v0, v1, v1 ; 10000301 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v1, 0x41a00000, v15 ; 10021EFF 41A00000 v_exp_f32_e32 v1, v1 ; 7E024B01 v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mul_f32_e32 v1, v38, v0 ; 10020126 v_mul_f32_e32 v16, v39, v0 ; 10200127 v_mul_f32_e32 v0, v40, v0 ; 10000128 v_mul_f32_e32 v17, s23, v38 ; 10224C17 v_mul_f32_e32 v30, s23, v39 ; 103C4E17 v_mul_f32_e32 v31, s23, v40 ; 103E5017 v_mad_f32 v1, s22, v1, v17 ; D2820001 04460216 v_mad_f32 v16, s22, v16, v30 ; D2820010 047A2016 v_mad_f32 v0, s22, v0, v31 ; D2820000 047E0016 v_mul_f32_e32 v1, s21, v1 ; 10020215 v_mad_f32 v1, s20, v1, v10 ; D2820001 042A0214 v_mul_f32_e32 v10, s21, v16 ; 10142015 v_mad_f32 v10, s20, v10, v11 ; D282000A 042E1414 v_mul_f32_e32 v0, s21, v0 ; 10000015 v_mad_f32 v0, s20, v0, v12 ; D2820000 04320014 v_mul_f32_e32 v11, s21, v25 ; 10163215 v_mul_f32_e32 v11, v19, v11 ; 10161713 v_mul_f32_e32 v12, s21, v33 ; 10184215 v_mul_f32_e32 v12, v20, v12 ; 10181914 v_mul_f32_e32 v16, s21, v22 ; 10202C15 v_mul_f32_e32 v16, v21, v16 ; 10202115 v_mad_f32 v7, s19, v11, v7 ; D2820007 041E1613 v_mad_f32 v8, s19, v12, v8 ; D2820008 04221813 v_mad_f32 v9, s19, v16, v9 ; D2820009 04262013 v_min_f32_e32 v11, 0x7f7fffff, v23 ; 1E162EFF 7F7FFFFF v_mul_f32_e32 v12, v2, v2 ; 10180502 v_mad_f32 v12, v3, v3, v12 ; D282000C 04320703 v_mad_f32 v12, v4, v4, v12 ; D282000C 04320904 v_rsq_clamp_f32_e32 v12, v12 ; 7E18590C v_mul_f32_e32 v5, v11, v5 ; 100A0B0B v_mul_f32_e32 v6, v11, v6 ; 100C0D0B v_mul_f32_e32 v11, v11, v13 ; 10161B0B v_min_f32_e32 v12, 0x7f7fffff, v12 ; 1E1818FF 7F7FFFFF v_mul_f32_e32 v2, v12, v2 ; 1004050C v_mul_f32_e32 v3, v12, v3 ; 1006070C v_mul_f32_e32 v4, v12, v4 ; 1008090C v_mul_f32_e32 v12, v3, v11 ; 10181703 v_mad_f32 v12, v6, v4, -v12 ; D282000C 84320906 v_mul_f32_e32 v13, v35, v5 ; 101A0B23 v_mad_f32 v13, v6, v36, v13 ; D282000D 04364906 v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mad_f32 v6, v5, v3, -v6 ; D2820006 841A0705 v_mul_f32_e32 v5, v4, v5 ; 100A0B04 v_mad_f32 v5, v11, v2, -v5 ; D2820005 8416050B v_mad_f32 v11, v11, v24, v13 ; D282000B 0436310B v_mul_f32_e32 v2, v35, v2 ; 10040523 v_mad_f32 v2, v3, v36, v2 ; D2820002 040A4903 v_mad_f32 v2, v4, v24, v2 ; D2820002 040A3104 v_mul_f32_e32 v3, v14, v12 ; 1006190E v_mul_f32_e32 v4, v14, v5 ; 10080B0E v_mul_f32_e32 v5, v14, v6 ; 100A0D0E v_mul_f32_e32 v3, v35, v3 ; 10060723 v_mad_f32 v3, v4, v36, v3 ; D2820003 040E4904 v_mad_f32 v3, v5, v24, v3 ; D2820003 040E3105 v_mul_f32_e32 v4, v11, v11 ; 1008170B v_mov_b32_e32 v5, 0x40400000 ; 7E0A02FF 40400000 v_mad_f32 v4, v5, v4, -1.0 ; D2820004 03CE0905 s_buffer_load_dword s4, s[8:11], 0x21 ; C2020921 s_buffer_load_dword s5, s[8:11], 0x20 ; C2028920 v_mul_f32_e32 v5, v11, v3 ; 100A070B v_mov_b32_e32 v6, 0xbf5bab79 ; 7E0C02FF BF5BAB79 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_mul_f32_e32 v12, v11, v2 ; 1018050B v_mul_f32_e32 v6, v6, v12 ; 100C1906 s_buffer_load_dword s6, s[8:11], 0x29 ; C2030929 s_buffer_load_dword s7, s[8:11], 0x28 ; C2038928 v_mul_f32_e32 v4, 0x3e7da73e, v4 ; 100808FF 3E7DA73E s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v12, s4, v4 ; 10180804 s_buffer_load_dword s4, s[8:11], 0x31 ; C2020931 s_buffer_load_dword s19, s[8:11], 0x30 ; C2098930 v_mad_f32 v12, s5, v5, v12 ; D282000C 04320A05 s_buffer_load_dword s5, s[8:11], 0x22 ; C2028922 s_buffer_load_dword s20, s[8:11], 0x2a ; C20A092A s_buffer_load_dword s21, s[8:11], 0x2b ; C20A892B v_mul_f32_e32 v13, s6, v4 ; 101A0806 v_mad_f32 v13, s7, v5, v13 ; D282000D 04360A07 s_buffer_load_dword s6, s[8:11], 0x32 ; C2030932 s_buffer_load_dword s7, s[8:11], 0x33 ; C2038933 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v4 ; 10080804 v_mad_f32 v4, s19, v5, v4 ; D2820004 04120A13 s_buffer_load_dword s4, s[8:11], 0x23 ; C2020923 s_buffer_load_dword s19, s[8:11], 0x24 ; C2098924 s_buffer_load_dword s22, s[8:11], 0x25 ; C20B0925 s_buffer_load_dword s23, s[8:11], 0x26 ; C20B8926 v_mad_f32 v5, s5, v6, v12 ; D2820005 04320C05 v_mad_f32 v12, s20, v6, v13 ; D282000C 04360C14 v_mad_f32 v4, s6, v6, v4 ; D2820004 04120C06 v_mul_f32_e32 v6, v3, v3 ; 100C0703 v_mad_f32 v6, v2, v2, -v6 ; D2820006 841A0502 s_buffer_load_dword s5, s[8:11], 0x1d ; C202891D v_mul_f32_e32 v6, 0x3edbab79, v6 ; 100C0CFF 3EDBAB79 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v6, v5 ; D2820005 04160C04 v_mad_f32 v12, s21, v6, v12 ; D282000C 04320C15 v_mad_f32 v4, s7, v6, v4 ; D2820004 04120C07 v_mul_f32_e32 v6, 0x3f82fc67, v11 ; 100C16FF 3F82FC67 s_buffer_load_dword s4, s[8:11], 0x1e ; C202091E s_buffer_load_dword s6, s[8:11], 0x1f ; C203091F s_buffer_load_dword s7, s[8:11], 0x2d ; C203892D s_buffer_load_dword s20, s[8:11], 0x2c ; C20A092C v_mul_f32_e32 v11, s5, v6 ; 10160C05 v_mov_b32_e32 v13, 0xbf82fc67 ; 7E1A02FF BF82FC67 v_mul_f32_e32 v14, v13, v3 ; 101C070D v_mad_f32 v11, s18, v14, v11 ; D282000B 042E1C12 s_buffer_load_dword s5, s[8:11], 0x2e ; C202892E v_mul_f32_e32 v16, s22, v6 ; 10200C16 v_mad_f32 v16, s19, v14, v16 ; D2820010 04421C13 s_buffer_load_dword s18, s[8:11], 0x2f ; C209092F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s7, v6 ; 100C0C07 v_mad_f32 v6, s20, v14, v6 ; D2820006 041A1C14 v_mul_f32_e32 v3, v3, v2 ; 10060503 s_buffer_load_dword s7, s[8:11], 0x27 ; C2038927 v_mul_f32_e32 v2, v13, v2 ; 1004050D v_mad_f32 v11, s4, v2, v11 ; D282000B 042E0404 v_mad_f32 v13, s23, v2, v16 ; D282000D 04420417 v_mad_f32 v2, s5, v2, v6 ; D2820002 041A0405 s_buffer_load_dword s4, s[8:11], 0x44 ; C2020944 s_buffer_load_dword s5, s[8:11], 0x45 ; C2028945 s_buffer_load_dword s8, s[8:11], 0x46 ; C2040946 v_mul_f32_e32 v3, 0x3f5bab79, v3 ; 100606FF 3F5BAB79 v_mad_f32 v6, s6, v3, v11 ; D2820006 042E0606 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v11, s7, v3, v13 ; D282000B 04360607 v_mad_f32 v2, s18, v3, v2 ; D2820002 040A0612 v_mov_b32_e32 v3, 0x3f62dfd0 ; 7E0602FF 3F62DFD0 v_mad_f32 v6, s14, v3, v6 ; D2820006 041A060E v_mad_f32 v2, s16, v3, v2 ; D2820002 040A0610 v_mad_f32 v3, s15, v3, v11 ; D2820003 042E060F v_mul_f32_e32 v11, v26, v35 ; 1016471A v_mad_f32 v11, v36, v27, v11 ; D282000B 042E3724 v_mad_f32 v11, v24, v28, v11 ; D282000B 042E3918 v_mul_f32_e32 v13, s17, v15 ; 101A1E11 v_mov_b32_e32 v14, 0x41000000 ; 7E1C02FF 41000000 v_add_f32_e32 v14, s17, v14 ; 061C1C11 v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_mul_f32_e32 v13, v13, v14 ; 101A1D0D v_mul_f32_e32 v13, 0x3d22f983, v13 ; 101A1AFF 3D22F983 v_cndmask_b32_e64 v13, v13, 0, vcc ; D200000D 01A9010D v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_add_f32_e32 v14, v18, v11 ; 061C1712 v_cmp_gt_f32_e32 vcc, 0, v14 ; 7C081C80 v_cndmask_b32_e64 v11, v11, 0, vcc ; D200000B 01A9010B v_add_f32_e32 v5, v6, v5 ; 060A0B06 v_max_f32_e32 v5, 0, v5 ; 200A0A80 v_mad_f32 v5, v7, v5, s2 ; D2820005 000A0B07 v_mul_f32_e32 v1, v13, v1 ; 1002030D v_mad_f32 v1, v7, v11, v1 ; D2820001 04061707 v_mad_f32 v1, v1, s13, v5 ; D2820001 04141B01 v_mul_f32_e32 v5, v13, v10 ; 100A150D v_mul_f32_e32 v0, v13, v0 ; 1000010D v_mad_f32 v5, v8, v11, v5 ; D2820005 04161708 v_mad_f32 v0, v9, v11, v0 ; D2820000 04021709 v_add_f32_e32 v2, v2, v4 ; 06040902 v_add_f32_e32 v3, v3, v12 ; 06061903 v_max_f32_e32 v3, 0, v3 ; 20060680 v_mad_f32 v3, v8, v3, s1 ; D2820003 00060708 v_mad_f32 v3, v5, s12, v3 ; D2820003 040C1905 v_max_f32_e32 v2, 0, v2 ; 20040480 v_mad_f32 v2, v9, v2, s0 ; D2820002 00020509 v_mad_f32 v0, v0, s3, v2 ; D2820000 04080700 v_mad_f32 v1, v7, s4, v1 ; D2820001 04040907 v_mad_f32 v2, v8, s5, v3 ; D2820002 040C0B08 v_mad_f32 v0, v9, s8, v0 ; D2820000 04001109 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_mul_f32_e32 v2, 0x3b800000, v29 ; 10043AFF 3B800000 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 48 Code Size: 1980 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL IN[2], TEXCOORD[5], PERSPECTIVE DCL IN[3], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL CONST[0..9] DCL TEMP[0] DCL TEMP[1], LOCAL DCL TEMP[2..5] IMM[0] FLT32 { 2.0000, -1.0000, -0.3333, 1.0000} IMM[1] FLT32 { 0.8165, 0.5774, 0.0000, 0.0000} IMM[2] FLT32 { 0.0039, 0.0000, 340282346638528859811704183484516925440.0000, -340282346638528859811704183484516925440.0000} IMM[3] FLT32 { -0.7071, -0.4082, 0.5774, 0.7071} 0: TEX TEMP[0], IN[1], SAMP[5], 2D 1: ADD TEMP[0], TEMP[0].xxxx, IMM[0].zzzz 2: KILL_IF TEMP[0] 3: DP3 TEMP[1].x, IN[3], IN[3] 4: RSQ TEMP[1].x, TEMP[1].xxxx 5: MIN TEMP[1].x, IMM[2].zzzz, TEMP[1].xxxx 6: MUL TEMP[0].xyz, IN[3], TEMP[1].xxxx 7: TEX TEMP[2], IN[1], SAMP[2], 2D 8: MAD TEMP[2].xyz, TEMP[2], IMM[0].xxxx, IMM[0].yyyy 9: DP3 TEMP[1].x, TEMP[2], TEMP[2] 10: RSQ TEMP[1].x, TEMP[1].xxxx 11: MIN TEMP[1].x, IMM[2].zzzz, TEMP[1].xxxx 12: MUL TEMP[3].xyz, TEMP[2], TEMP[1].xxxx 13: DP3 TEMP[0].w, TEMP[3], TEMP[0] 14: MUL TEMP[2].xyz, TEMP[0].wwww, TEMP[3] 15: MAD TEMP[0].xyz, TEMP[2], IMM[0].xxxx, -TEMP[0] 16: DP2 TEMP[1].x, TEMP[0].yzzw, IMM[1] 17: ADD_SAT TEMP[2].x, IMM[1].zzzz, TEMP[1].xxxx 18: DP3_SAT TEMP[2].y, TEMP[0], IMM[3] 19: DP3_SAT TEMP[2].z, TEMP[0].yzxw, IMM[3].yzww 20: MAX TEMP[0].xyz, TEMP[2], IMM[1].wwww 21: LG2 TEMP[1].x, |TEMP[0].xxxx| 22: MAX TEMP[2].x, IMM[2].wwww, TEMP[1].xxxx 23: LG2 TEMP[1].x, |TEMP[0].yyyy| 24: MAX TEMP[2].y, IMM[2].wwww, TEMP[1].xxxx 25: LG2 TEMP[1].x, |TEMP[0].zzzz| 26: MAX TEMP[2].z, IMM[2].wwww, TEMP[1].xxxx 27: MOV TEMP[0].w, IMM[0].wwww 28: ADD TEMP[0].x, TEMP[0].wwww, CONST[8].xxxx 29: MUL TEMP[0].xyz, TEMP[2], TEMP[0].xxxx 30: EX2 TEMP[2].x, TEMP[0].xxxx 31: EX2 TEMP[2].y, TEMP[0].yyyy 32: EX2 TEMP[2].z, TEMP[0].zzzz 33: TEX TEMP[4], IN[0], SAMP[1], 2D 34: MUL TEMP[0].xyz, TEMP[4], CONST[7] 35: DP3 TEMP[2].x, TEMP[0], TEMP[2] 36: TEX TEMP[4], IN[1], SAMP[4], 2D 37: MAD TEMP[2].yzw, TEMP[4].yyyy, CONST[5].wwww, CONST[5].xxyz 38: TEX TEMP[4], IN[0], SAMP[0], 2D 39: MUL TEMP[4].xyz, TEMP[4], CONST[6] 40: MUL TEMP[2].yzw, TEMP[2], TEMP[4].xxyz 41: MUL TEMP[2].xyz, TEMP[2].xxxx, TEMP[2].yzww 42: DP2 TEMP[1].x, TEMP[3].yzzw, IMM[1] 43: ADD_SAT TEMP[5].x, IMM[1].zzzz, TEMP[1].xxxx 44: DP3_SAT TEMP[5].y, TEMP[3], IMM[3] 45: DP3_SAT TEMP[5].z, TEMP[3].yzxw, IMM[3].yzww 46: MUL TEMP[3].xyz, TEMP[5], TEMP[5] 47: MAX TEMP[5].xyz, TEMP[3], IMM[1].wwww 48: DP3 TEMP[0].x, TEMP[0], TEMP[5] 49: ADD TEMP[0].yzw, TEMP[0].wwww, -CONST[0].xxyz 50: TEX TEMP[3], IN[1], SAMP[3], 2D 51: MUL TEMP[0].yzw, TEMP[0], TEMP[3].xxyz 52: MAD TEMP[0].yzw, TEMP[0], CONST[4].wwww, CONST[4].xxyz 53: MUL TEMP[3].xyz, TEMP[4], TEMP[0].yzww 54: MAD TEMP[2].xyz, TEMP[3], TEMP[0].xxxx, TEMP[2] 55: ADD TEMP[2].xyz, TEMP[2], CONST[0] 56: MAD OUT[0].xyz, TEMP[0].yzww, CONST[9], TEMP[2] 57: MUL OUT[0].w, IMM[2].xxxx, IN[2].wwww 58: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %45 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %46 = load <32 x i8>, <32 x i8> addrspace(2)* %45, align 32, !tbaa !0 %47 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 %49 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %50 = bitcast <8 x i32> addrspace(2)* %49 to <32 x i8> addrspace(2)* %51 = load <32 x i8>, <32 x i8> addrspace(2)* %50, align 32, !tbaa !0 %52 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %53 = bitcast <4 x i32> addrspace(2)* %52 to <16 x i8> addrspace(2)* %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %56 = bitcast <8 x i32> addrspace(2)* %55 to <32 x i8> addrspace(2)* %57 = load <32 x i8>, <32 x i8> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %59 = bitcast <4 x i32> addrspace(2)* %58 to <16 x i8> addrspace(2)* %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %62 = bitcast <8 x i32> addrspace(2)* %61 to <32 x i8> addrspace(2)* %63 = load <32 x i8>, <32 x i8> addrspace(2)* %62, align 32, !tbaa !0 %64 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %65 = bitcast <4 x i32> addrspace(2)* %64 to <16 x i8> addrspace(2)* %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %68 = bitcast <8 x i32> addrspace(2)* %67 to <32 x i8> addrspace(2)* %69 = load <32 x i8>, <32 x i8> addrspace(2)* %68, align 32, !tbaa !0 %70 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %71 = bitcast <4 x i32> addrspace(2)* %70 to <16 x i8> addrspace(2)* %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %74 = bitcast <8 x i32> addrspace(2)* %73 to <32 x i8> addrspace(2)* %75 = load <32 x i8>, <32 x i8> addrspace(2)* %74, align 32, !tbaa !0 %76 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %77 = bitcast <4 x i32> addrspace(2)* %76 to <16 x i8> addrspace(2)* %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 %79 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %87 = bitcast float %81 to i32 %88 = bitcast float %82 to i32 %89 = insertelement <2 x i32> undef, i32 %87, i32 0 %90 = insertelement <2 x i32> %89, i32 %88, i32 1 %91 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %90, <32 x i8> %75, <16 x i8> %78, i32 2) %92 = extractelement <4 x float> %91, i32 0 %93 = fadd float %92, 0xBFD554C980000000 %94 = fadd float %92, 0xBFD554C980000000 %95 = fadd float %92, 0xBFD554C980000000 %96 = fadd float %92, 0xBFD554C980000000 %97 = fcmp olt float %93, 0.000000e+00 %98 = fcmp olt float %94, 0.000000e+00 %99 = fcmp olt float %95, 0.000000e+00 %100 = fcmp olt float %96, 0.000000e+00 %101 = or i1 %100, %99 %102 = or i1 %101, %98 %103 = or i1 %102, %97 %104 = select i1 %103, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %104) %105 = fmul float %84, %84 %106 = fmul float %85, %85 %107 = fadd float %106, %105 %108 = fmul float %86, %86 %109 = fadd float %107, %108 %110 = call float @llvm.AMDGPU.rsq.clamped.f32(float %109) %111 = call float @llvm.minnum.f32(float %110, float 0x47EFFFFFE0000000) %112 = fmul float %84, %111 %113 = fmul float %85, %111 %114 = fmul float %86, %111 %115 = bitcast float %81 to i32 %116 = bitcast float %82 to i32 %117 = insertelement <2 x i32> undef, i32 %115, i32 0 %118 = insertelement <2 x i32> %117, i32 %116, i32 1 %119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %118, <32 x i8> %57, <16 x i8> %60, i32 2) %120 = extractelement <4 x float> %119, i32 0 %121 = extractelement <4 x float> %119, i32 1 %122 = extractelement <4 x float> %119, i32 2 %123 = fmul float %120, 2.000000e+00 %124 = fadd float %123, -1.000000e+00 %125 = fmul float %121, 2.000000e+00 %126 = fadd float %125, -1.000000e+00 %127 = fmul float %122, 2.000000e+00 %128 = fadd float %127, -1.000000e+00 %129 = fmul float %124, %124 %130 = fmul float %126, %126 %131 = fadd float %130, %129 %132 = fmul float %128, %128 %133 = fadd float %131, %132 %134 = call float @llvm.AMDGPU.rsq.clamped.f32(float %133) %135 = call float @llvm.minnum.f32(float %134, float 0x47EFFFFFE0000000) %136 = fmul float %124, %135 %137 = fmul float %126, %135 %138 = fmul float %128, %135 %139 = fmul float %136, %112 %140 = fmul float %137, %113 %141 = fadd float %140, %139 %142 = fmul float %138, %114 %143 = fadd float %141, %142 %144 = fmul float %143, %136 %145 = fmul float %143, %137 %146 = fmul float %143, %138 %147 = fmul float %144, 2.000000e+00 %148 = fsub float %147, %112 %149 = fmul float %145, 2.000000e+00 %150 = fsub float %149, %113 %151 = fmul float %146, 2.000000e+00 %152 = fsub float %151, %114 %153 = fmul float %150, 0x3FEA20BD80000000 %154 = fmul float %152, 0x3FE279A740000000 %155 = fadd float %153, %154 %156 = fadd float %155, 0.000000e+00 %157 = call float @llvm.AMDIL.clamp.(float %156, float 0.000000e+00, float 1.000000e+00) %158 = fmul float %148, 0xBFE6A09E60000000 %159 = fmul float %150, 0xBFDA20BD80000000 %160 = fadd float %159, %158 %161 = fmul float %152, 0x3FE279A740000000 %162 = fadd float %160, %161 %163 = call float @llvm.AMDIL.clamp.(float %162, float 0.000000e+00, float 1.000000e+00) %164 = fmul float %150, 0xBFDA20BD80000000 %165 = fmul float %152, 0x3FE279A740000000 %166 = fadd float %165, %164 %167 = fmul float %148, 0x3FE6A09E60000000 %168 = fadd float %166, %167 %169 = call float @llvm.AMDIL.clamp.(float %168, float 0.000000e+00, float 1.000000e+00) %170 = call float @llvm.maxnum.f32(float %157, float 0x3EB0C6F7A0000000) %171 = call float @llvm.maxnum.f32(float %163, float 0x3EB0C6F7A0000000) %172 = call float @llvm.maxnum.f32(float %169, float 0x3EB0C6F7A0000000) %173 = call float @fabs(float %170) %174 = call float @llvm.log2.f32(float %173) %175 = call float @llvm.maxnum.f32(float %174, float 0xC7EFFFFFE0000000) %176 = call float @fabs(float %171) %177 = call float @llvm.log2.f32(float %176) %178 = call float @llvm.maxnum.f32(float %177, float 0xC7EFFFFFE0000000) %179 = call float @fabs(float %172) %180 = call float @llvm.log2.f32(float %179) %181 = call float @llvm.maxnum.f32(float %180, float 0xC7EFFFFFE0000000) %182 = fadd float %41, 1.000000e+00 %183 = fmul float %175, %182 %184 = fmul float %178, %182 %185 = fmul float %181, %182 %186 = call float @llvm.AMDIL.exp.(float %183) %187 = call float @llvm.AMDIL.exp.(float %184) %188 = call float @llvm.AMDIL.exp.(float %185) %189 = bitcast float %79 to i32 %190 = bitcast float %80 to i32 %191 = insertelement <2 x i32> undef, i32 %189, i32 0 %192 = insertelement <2 x i32> %191, i32 %190, i32 1 %193 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %192, <32 x i8> %51, <16 x i8> %54, i32 2) %194 = extractelement <4 x float> %193, i32 0 %195 = extractelement <4 x float> %193, i32 1 %196 = extractelement <4 x float> %193, i32 2 %197 = fmul float %194, %38 %198 = fmul float %195, %39 %199 = fmul float %196, %40 %200 = fmul float %197, %186 %201 = fmul float %198, %187 %202 = fadd float %201, %200 %203 = fmul float %199, %188 %204 = fadd float %202, %203 %205 = bitcast float %81 to i32 %206 = bitcast float %82 to i32 %207 = insertelement <2 x i32> undef, i32 %205, i32 0 %208 = insertelement <2 x i32> %207, i32 %206, i32 1 %209 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %208, <32 x i8> %69, <16 x i8> %72, i32 2) %210 = extractelement <4 x float> %209, i32 1 %211 = fmul float %210, %34 %212 = fadd float %211, %31 %213 = fmul float %210, %34 %214 = fadd float %213, %32 %215 = fmul float %210, %34 %216 = fadd float %215, %33 %217 = bitcast float %79 to i32 %218 = bitcast float %80 to i32 %219 = insertelement <2 x i32> undef, i32 %217, i32 0 %220 = insertelement <2 x i32> %219, i32 %218, i32 1 %221 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %220, <32 x i8> %46, <16 x i8> %48, i32 2) %222 = extractelement <4 x float> %221, i32 0 %223 = extractelement <4 x float> %221, i32 1 %224 = extractelement <4 x float> %221, i32 2 %225 = fmul float %222, %35 %226 = fmul float %223, %36 %227 = fmul float %224, %37 %228 = fmul float %212, %225 %229 = fmul float %214, %226 %230 = fmul float %216, %227 %231 = fmul float %204, %228 %232 = fmul float %204, %229 %233 = fmul float %204, %230 %234 = fmul float %137, 0x3FEA20BD80000000 %235 = fmul float %138, 0x3FE279A740000000 %236 = fadd float %234, %235 %237 = fadd float %236, 0.000000e+00 %238 = call float @llvm.AMDIL.clamp.(float %237, float 0.000000e+00, float 1.000000e+00) %239 = fmul float %136, 0xBFE6A09E60000000 %240 = fmul float %137, 0xBFDA20BD80000000 %241 = fadd float %240, %239 %242 = fmul float %138, 0x3FE279A740000000 %243 = fadd float %241, %242 %244 = call float @llvm.AMDIL.clamp.(float %243, float 0.000000e+00, float 1.000000e+00) %245 = fmul float %137, 0xBFDA20BD80000000 %246 = fmul float %138, 0x3FE279A740000000 %247 = fadd float %246, %245 %248 = fmul float %136, 0x3FE6A09E60000000 %249 = fadd float %247, %248 %250 = call float @llvm.AMDIL.clamp.(float %249, float 0.000000e+00, float 1.000000e+00) %251 = fmul float %238, %238 %252 = fmul float %244, %244 %253 = fmul float %250, %250 %254 = call float @llvm.maxnum.f32(float %251, float 0x3EB0C6F7A0000000) %255 = call float @llvm.maxnum.f32(float %252, float 0x3EB0C6F7A0000000) %256 = call float @llvm.maxnum.f32(float %253, float 0x3EB0C6F7A0000000) %257 = fmul float %197, %254 %258 = fmul float %198, %255 %259 = fadd float %258, %257 %260 = fmul float %199, %256 %261 = fadd float %259, %260 %262 = fsub float 1.000000e+00, %24 %263 = fsub float 1.000000e+00, %25 %264 = fsub float 1.000000e+00, %26 %265 = bitcast float %81 to i32 %266 = bitcast float %82 to i32 %267 = insertelement <2 x i32> undef, i32 %265, i32 0 %268 = insertelement <2 x i32> %267, i32 %266, i32 1 %269 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %268, <32 x i8> %63, <16 x i8> %66, i32 2) %270 = extractelement <4 x float> %269, i32 0 %271 = extractelement <4 x float> %269, i32 1 %272 = extractelement <4 x float> %269, i32 2 %273 = fmul float %262, %270 %274 = fmul float %263, %271 %275 = fmul float %264, %272 %276 = fmul float %273, %30 %277 = fadd float %276, %27 %278 = fmul float %274, %30 %279 = fadd float %278, %28 %280 = fmul float %275, %30 %281 = fadd float %280, %29 %282 = fmul float %225, %277 %283 = fmul float %226, %279 %284 = fmul float %227, %281 %285 = fmul float %282, %261 %286 = fadd float %285, %231 %287 = fmul float %283, %261 %288 = fadd float %287, %232 %289 = fmul float %284, %261 %290 = fadd float %289, %233 %291 = fadd float %286, %24 %292 = fadd float %288, %25 %293 = fadd float %290, %26 %294 = fmul float %277, %42 %295 = fadd float %294, %291 %296 = fmul float %279, %43 %297 = fadd float %296, %292 %298 = fmul float %281, %44 %299 = fadd float %298, %293 %300 = fmul float %83, 3.906250e-03 %301 = call i32 @llvm.SI.packf16(float %295, float %297) %302 = bitcast i32 %301 to float %303 = call i32 @llvm.SI.packf16(float %299, float %300) %304 = bitcast i32 %303 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %302, float %304, float %302, float %304) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 s_load_dwordx4 s[8:11], s[4:5], 0x14 ; C0840514 s_load_dwordx8 s[12:19], s[6:7], 0x28 ; C0C60728 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 3, 2, [m0] ; C8180B00 v_interp_p2_f32 v6, [v6], v1, 3, 2, [m0] ; C8190B01 v_interp_p1_f32 v7, v0, 0, 3, [m0] ; C81C0C00 v_interp_p2_f32 v7, [v7], v1, 0, 3, [m0] ; C81D0C01 v_interp_p1_f32 v8, v0, 1, 3, [m0] ; C8200D00 v_interp_p2_f32 v8, [v8], v1, 1, 3, [m0] ; C8210D01 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800100 00430104 v_mov_b32_e32 v9, 0xbeaaa64c ; 7E1202FF BEAAA64C s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v1, v1, v9 ; 06021301 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, 0, -1.0, vcc ; D2000001 01A9E680 v_cndmask_b32_e64 v1, v1, -1.0, vcc ; D2000001 01A9E701 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[44:47], s[4:5], 0x4 ; C0960504 s_load_dwordx4 s[56:59], s[4:5], 0x8 ; C09C0508 s_load_dwordx8 s[60:67], s[6:7], 0x10 ; C0DE0710 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_load_dwordx8 s[48:55], s[6:7], 0x8 ; C0D80708 s_load_dwordx4 s[32:35], s[4:5], 0x10 ; C0900510 s_load_dwordx8 s[36:43], s[6:7], 0x20 ; C0D20720 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 v_cndmask_b32_e64 v1, v1, -1.0, vcc ; D2000001 01A9E701 v_cndmask_b32_e64 v1, v1, -1.0, vcc ; D2000001 01A9E701 v_cmpx_le_f32_e32 vcc, 0, v1 ; 7C260280 s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C s_load_dwordx8 s[12:19], s[6:7], 0x18 ; C0C60718 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[60:67], s[56:59] ; F0800700 01CF0904 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C image_sample v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[48:55], s[44:47] ; F0800700 016C0C02 image_sample v1, 2, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[36:43], s[32:35] ; F0800200 01090104 image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[28:31] ; F0800700 00E50F02 s_buffer_load_dword s5, s[0:3], 0x1d ; C202811D s_buffer_load_dword s6, s[0:3], 0x1e ; C203011E s_buffer_load_dword s7, s[0:3], 0x18 ; C2038118 s_buffer_load_dword s20, s[0:3], 0x19 ; C20A0119 s_buffer_load_dword s21, s[0:3], 0x1a ; C20A811A image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800700 00430204 s_waitcnt vmcnt(3) lgkmcnt(0) ; BF8C0073 v_mul_f32_e32 v5, s4, v12 ; 100A1804 s_buffer_load_dword s4, s[0:3], 0x20 ; C2020120 s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124 s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v12, s5, v13 ; 10181A05 v_mul_f32_e32 v13, s6, v14 ; 101A1C06 v_mul_f32_e32 v14, s7, v15 ; 101C1E07 v_mul_f32_e32 v15, s20, v16 ; 101E2014 v_mul_f32_e32 v16, s21, v17 ; 10202215 s_buffer_load_dword s5, s[0:3], 0x17 ; C2028117 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x14 ; C2058114 s_buffer_load_dword s12, s[0:3], 0x15 ; C2060115 s_buffer_load_dword s13, s[0:3], 0x16 ; C2068116 s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112 s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113 s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_sub_f32_e64 v17, 1.0, s6 ; D2080011 00000CF2 v_mul_f32_e32 v2, v2, v17 ; 10042302 v_sub_f32_e64 v17, 1.0, s7 ; D2080011 00000EF2 v_mul_f32_e32 v3, v3, v17 ; 10062303 v_sub_f32_e64 v17, 1.0, s10 ; D2080011 000014F2 v_mul_f32_e32 v4, v4, v17 ; 10082304 v_mov_b32_e32 v17, s11 ; 7E22020B v_mov_b32_e32 v18, s12 ; 7E24020C v_mov_b32_e32 v19, s13 ; 7E26020D v_mad_f32 v17, s5, v1, v17 ; D2820011 04460205 v_mad_f32 v18, s5, v1, v18 ; D2820012 044A0205 v_mad_f32 v1, s5, v1, v19 ; D2820001 044E0205 v_mov_b32_e32 v19, s16 ; 7E260210 v_mad_f32 v2, s15, v2, v19 ; D2820002 044E040F v_mov_b32_e32 v19, s17 ; 7E260211 v_mad_f32 v3, s15, v3, v19 ; D2820003 044E060F v_mov_b32_e32 v19, s14 ; 7E26020E v_mad_f32 v4, s15, v4, v19 ; D2820004 044E080F v_mad_f32 v9, 2.0, v9, -1.0 ; D2820009 03CE12F4 v_mad_f32 v10, 2.0, v10, -1.0 ; D282000A 03CE14F4 v_mad_f32 v11, 2.0, v11, -1.0 ; D282000B 03CE16F4 v_mul_f32_e32 v19, v9, v9 ; 10261309 v_mad_f32 v19, v10, v10, v19 ; D2820013 044E150A v_mad_f32 v19, v11, v11, v19 ; D2820013 044E170B v_rsq_clamp_f32_e32 v19, v19 ; 7E265913 v_mul_f32_e32 v20, v7, v7 ; 10280F07 v_mad_f32 v20, v8, v8, v20 ; D2820014 04521108 v_mad_f32 v20, v0, v0, v20 ; D2820014 04520100 v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 v_min_f32_e32 v19, 0x7f7fffff, v19 ; 1E2626FF 7F7FFFFF v_mul_f32_e32 v9, v19, v9 ; 10121313 v_mul_f32_e32 v10, v19, v10 ; 10141513 v_mul_f32_e32 v11, v19, v11 ; 10161713 v_min_f32_e32 v19, 0x7f7fffff, v20 ; 1E2628FF 7F7FFFFF v_mul_f32_e32 v20, v19, v7 ; 10280F13 v_mul_f32_e32 v20, v20, v9 ; 10281314 v_mul_f32_e32 v21, v19, v8 ; 102A1113 v_mad_f32 v20, v10, v21, v20 ; D2820014 04522B0A v_mul_f32_e32 v21, v19, v0 ; 102A0113 v_mad_f32 v20, v11, v21, v20 ; D2820014 04522B0B v_mul_f32_e32 v21, v9, v20 ; 102A2909 v_mad_f32 v21, v20, v9, v21 ; D2820015 04561314 v_mad_f32 v7, -v7, v19, v21 ; D2820007 24562707 v_mul_f32_e32 v21, v10, v20 ; 102A290A v_mad_f32 v21, v20, v10, v21 ; D2820015 04561514 v_mad_f32 v8, -v8, v19, v21 ; D2820008 24562708 v_mul_f32_e32 v21, v11, v20 ; 102A290B v_mad_f32 v20, v20, v11, v21 ; D2820014 04561714 v_mad_f32 v0, -v0, v19, v20 ; D2820000 24522700 v_mov_b32_e32 v19, 0x3f13cd3a ; 7E2602FF 3F13CD3A v_mad_f32 v20, v0, v19, 0 ; D2820014 02022700 v_mov_b32_e32 v21, 0x3f5105ec ; 7E2A02FF 3F5105EC v_mad_f32 v20, v8, v21, v20 ; D2820014 04522B08 v_mov_b32_e32 v22, 0xbf3504f3 ; 7E2C02FF BF3504F3 v_mul_f32_e32 v23, v22, v7 ; 102E0F16 v_mov_b32_e32 v24, 0xbed105ec ; 7E3002FF BED105EC v_mad_f32 v23, v8, v24, v23 ; D2820017 045E3108 v_mul_f32_e32 v8, v24, v8 ; 10101118 v_mad_f32 v23, v0, v19, v23 ; D2820017 045E2700 v_mad_f32 v0, v0, v19, v8 ; D2820000 04222700 v_mov_b32_e32 v8, 0x3f3504f3 ; 7E1002FF 3F3504F3 v_mad_f32 v0, v7, v8, v0 ; D2820000 04021107 v_add_f32_e64 v7, 0, v20 clamp ; D2060807 00022880 v_add_f32_e64 v20, 0, v23 clamp ; D2060814 00022E80 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_max_f32_e32 v7, 0x358637bd, v7 ; 200E0EFF 358637BD v_max_f32_e32 v20, 0x358637bd, v20 ; 202828FF 358637BD v_max_f32_e32 v0, 0x358637bd, v0 ; 200000FF 358637BD v_log_f32_e64 v7, |v7| ; D34E0107 00000107 v_log_f32_e64 v20, |v20| ; D34E0114 00000114 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mov_b32_e32 v23, 0xff7fffff ; 7E2E02FF FF7FFFFF v_max_f32_e32 v7, v23, v7 ; 200E0F17 v_max_f32_e32 v20, v23, v20 ; 20282917 v_max_f32_e32 v0, v23, v0 ; 20000117 v_add_f32_e64 v23, 1.0, s4 ; D2060017 000008F2 v_mul_f32_e32 v7, v23, v7 ; 100E0F17 v_mul_f32_e32 v20, v23, v20 ; 10282917 v_mul_f32_e32 v0, v23, v0 ; 10000117 v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_exp_f32_e32 v20, v20 ; 7E284B14 v_mul_f32_e32 v7, v7, v5 ; 100E0B07 v_mad_f32 v7, v12, v20, v7 ; D2820007 041E290C v_exp_f32_e32 v0, v0 ; 7E004B00 v_mad_f32 v0, v13, v0, v7 ; D2820000 041E010D s_buffer_load_dword s0, s[0:3], 0x26 ; C2000126 v_mul_f32_e32 v7, v14, v17 ; 100E230E v_mad_f32 v7, v0, v7, s6 ; D2820007 001A0F00 v_mul_f32_e32 v17, v15, v18 ; 1022250F v_mad_f32 v17, v0, v17, s7 ; D2820011 001E2300 v_mul_f32_e32 v1, v16, v1 ; 10020310 v_mad_f32 v0, v0, v1, s10 ; D2820000 002A0300 v_mad_f32 v1, v11, v19, 0 ; D2820001 0202270B v_mad_f32 v1, v10, v21, v1 ; D2820001 04062B0A v_mul_f32_e32 v18, v22, v9 ; 10241316 v_mad_f32 v18, v10, v24, v18 ; D2820012 044A310A v_mul_f32_e32 v10, v24, v10 ; 10141518 v_mad_f32 v18, v11, v19, v18 ; D2820012 044A270B v_mad_f32 v10, v11, v19, v10 ; D282000A 042A270B v_mad_f32 v8, v9, v8, v10 ; D2820008 042A1109 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_max_f32_e32 v1, 0x358637bd, v1 ; 200202FF 358637BD v_mul_f32_e32 v1, v1, v5 ; 10020B01 v_add_f32_e64 v5, 0, v18 clamp ; D2060805 00022480 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_max_f32_e32 v5, 0x358637bd, v5 ; 200A0AFF 358637BD v_mad_f32 v1, v12, v5, v1 ; D2820001 04060B0C v_add_f32_e64 v5, 0, v8 clamp ; D2060805 00021080 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_max_f32_e32 v5, 0x358637bd, v5 ; 200A0AFF 358637BD v_mad_f32 v1, v13, v5, v1 ; D2820001 04060B0D v_mul_f32_e32 v5, v2, v14 ; 100A1D02 v_mad_f32 v5, v5, v1, v7 ; D2820005 041E0305 v_mad_f32 v2, v2, s8, v5 ; D2820002 04141102 v_mul_f32_e32 v5, v3, v15 ; 100A1F03 v_mad_f32 v5, v5, v1, v17 ; D2820005 04460305 v_mad_f32 v3, v3, s9, v5 ; D2820003 04141303 v_mul_f32_e32 v5, v4, v16 ; 100A2104 v_mad_f32 v0, v5, v1, v0 ; D2820000 04020305 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s0, v0 ; D2820000 04000104 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_mul_f32_e32 v2, 0x3b800000, v6 ; 10040CFF 3B800000 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 28 Code Size: 1160 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[4], PERSPECTIVE DCL IN[1], TEXCOORD[5], PERSPECTIVE DCL IN[2], TEXCOORD[6], PERSPECTIVE DCL IN[3], TEXCOORD[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL CONST[0..10] DCL TEMP[0] DCL TEMP[1], LOCAL DCL TEMP[2..5] IMM[0] FLT32 { 0.3000, 0.5900, 0.1100, 0.0000} IMM[1] FLT32 { 2.0000, -1.0000, 1.0000, -0.0000} IMM[2] FLT32 { 15.0000, 0.9151, 0.0039, 0.0000} IMM[3] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: TEX TEMP[0], IN[3], SAMP[0], 2D 1: MAD TEMP[0].xyz, TEMP[0], IMM[1].xxxx, IMM[1].yyyy 2: DP3 TEMP[1].x, IN[2], IN[2] 3: RSQ TEMP[1].x, TEMP[1].xxxx 4: MIN TEMP[1].x, IMM[3].xxxx, TEMP[1].xxxx 5: MUL TEMP[2].xyz, IN[2], TEMP[1].xxxx 6: DP3_SAT TEMP[0].w, TEMP[0], TEMP[2] 7: DP3 TEMP[1].x, TEMP[0], TEMP[0] 8: RSQ TEMP[1].x, TEMP[1].xxxx 9: MIN TEMP[1].x, IMM[3].xxxx, TEMP[1].xxxx 10: MUL TEMP[3].xyz, TEMP[0], TEMP[1].xxxx 11: ADD TEMP[0].x, TEMP[0].wwww, IMM[1].wwww 12: POW TEMP[2].w, |TEMP[0].wwww|, CONST[7].yyyy 13: CMP TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww, TEMP[2].wwww 14: MUL TEMP[0].yzw, TEMP[0].xxxx, CONST[6].xxyz 15: TEX TEMP[4], IN[3], SAMP[1], 2D 16: DP3 TEMP[2].w, TEMP[4], IMM[0] 17: LRP TEMP[5].xyz, CONST[7].xxxx, TEMP[2].wwww, TEMP[4] 18: MAD TEMP[0].yzw, TEMP[5].xxyz, TEMP[0], -TEMP[4].xxyz 19: TEX TEMP[5], IN[3], SAMP[2], 2D 20: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[5].yyyy 21: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[0].yzww, TEMP[4] 22: DP3 TEMP[0].w, TEMP[0], IMM[0] 23: LRP TEMP[4].xyz, CONST[7].zzzz, TEMP[0].wwww, TEMP[0] 24: MUL TEMP[0].xyz, TEMP[4], CONST[7].wwww 25: MOV TEMP[4].z, IMM[1].zzzz 26: ADD TEMP[4].xyz, TEMP[4].zzzz, -CONST[0] 27: MUL TEMP[0].xyz, TEMP[0], TEMP[4] 28: MAD TEMP[0].xyz, TEMP[0], CONST[4].wwww, CONST[4] 29: DP3 TEMP[0].w, TEMP[3], TEMP[2] 30: MUL TEMP[4].xyz, TEMP[0].wwww, TEMP[3] 31: MAD TEMP[2].xyz, TEMP[4], IMM[1].xxxx, -TEMP[2] 32: DP3 TEMP[1].x, IN[0], IN[0] 33: RSQ TEMP[1].x, TEMP[1].xxxx 34: MIN TEMP[1].x, IMM[3].xxxx, TEMP[1].xxxx 35: MUL TEMP[4].xyz, IN[0], TEMP[1].xxxx 36: DP3_SAT TEMP[0].w, TEMP[2], TEMP[4] 37: DP3_SAT TEMP[2].x, TEMP[3], TEMP[4] 38: ADD TEMP[2].y, TEMP[0].wwww, IMM[1].wwww 39: POW TEMP[2].z, |TEMP[0].wwww|, IMM[2].xxxx 40: MUL TEMP[0].w, TEMP[2].zzzz, IMM[2].yyyy 41: TEX TEMP[3], IN[3], SAMP[3], 2D 42: MUL TEMP[3].xyz, TEMP[3], CONST[8].xxxx 43: MUL TEMP[3].xyz, TEMP[3], CONST[7].wwww 44: MAD TEMP[3].xyz, TEMP[3], CONST[5].wwww, CONST[5] 45: MUL TEMP[3].xyz, TEMP[0].wwww, TEMP[3] 46: CMP TEMP[2].yzw, TEMP[2].yyyy, IMM[0].wwww, TEMP[3].xxyz 47: ADD TEMP[0].w, TEMP[2].xxxx, IMM[1].wwww 48: CMP TEMP[0].w, TEMP[0].wwww, IMM[0].wwww, TEMP[2].xxxx 49: MAD TEMP[2].xyz, TEMP[0], TEMP[0].wwww, TEMP[2].yzww 50: MOV TEMP[3].xyz, CONST[0] 51: MAD TEMP[2].xyz, TEMP[2], CONST[9], TEMP[3] 52: MAD OUT[0].xyz, TEMP[0], CONST[10], TEMP[2] 53: MUL OUT[0].w, IMM[2].zzzz, IN[1].wwww 54: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %49 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %50 = load <32 x i8>, <32 x i8> addrspace(2)* %49, align 32, !tbaa !0 %51 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %54 = bitcast <8 x i32> addrspace(2)* %53 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %57 = bitcast <4 x i32> addrspace(2)* %56 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %60 = bitcast <8 x i32> addrspace(2)* %59 to <32 x i8> addrspace(2)* %61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %63 = bitcast <4 x i32> addrspace(2)* %62 to <16 x i8> addrspace(2)* %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %80 = bitcast float %78 to i32 %81 = bitcast float %79 to i32 %82 = insertelement <2 x i32> undef, i32 %80, i32 0 %83 = insertelement <2 x i32> %82, i32 %81, i32 1 %84 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %83, <32 x i8> %50, <16 x i8> %52, i32 2) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = fmul float %85, 2.000000e+00 %89 = fadd float %88, -1.000000e+00 %90 = fmul float %86, 2.000000e+00 %91 = fadd float %90, -1.000000e+00 %92 = fmul float %87, 2.000000e+00 %93 = fadd float %92, -1.000000e+00 %94 = fmul float %75, %75 %95 = fmul float %76, %76 %96 = fadd float %95, %94 %97 = fmul float %77, %77 %98 = fadd float %96, %97 %99 = call float @llvm.AMDGPU.rsq.clamped.f32(float %98) %100 = call float @llvm.minnum.f32(float %99, float 0x47EFFFFFE0000000) %101 = fmul float %75, %100 %102 = fmul float %76, %100 %103 = fmul float %77, %100 %104 = fmul float %89, %101 %105 = fmul float %91, %102 %106 = fadd float %105, %104 %107 = fmul float %93, %103 %108 = fadd float %106, %107 %109 = call float @llvm.AMDIL.clamp.(float %108, float 0.000000e+00, float 1.000000e+00) %110 = fmul float %89, %89 %111 = fmul float %91, %91 %112 = fadd float %111, %110 %113 = fmul float %93, %93 %114 = fadd float %112, %113 %115 = call float @llvm.AMDGPU.rsq.clamped.f32(float %114) %116 = call float @llvm.minnum.f32(float %115, float 0x47EFFFFFE0000000) %117 = fmul float %89, %116 %118 = fmul float %91, %116 %119 = fmul float %93, %116 %120 = fadd float %109, 0xBEB0C6F7A0000000 %121 = call float @fabs(float %109) %122 = call float @llvm.pow.f32(float %121, float %39) %123 = call float @llvm.AMDGPU.cndlt(float %120, float 0.000000e+00, float %122) %124 = fmul float %123, %35 %125 = fmul float %123, %36 %126 = fmul float %123, %37 %127 = bitcast float %78 to i32 %128 = bitcast float %79 to i32 %129 = insertelement <2 x i32> undef, i32 %127, i32 0 %130 = insertelement <2 x i32> %129, i32 %128, i32 1 %131 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %130, <32 x i8> %55, <16 x i8> %58, i32 2) %132 = extractelement <4 x float> %131, i32 0 %133 = extractelement <4 x float> %131, i32 1 %134 = extractelement <4 x float> %131, i32 2 %135 = fmul float %132, 0x3FD3333340000000 %136 = fmul float %133, 0x3FE2E147A0000000 %137 = fadd float %136, %135 %138 = fmul float %134, 0x3FBC28F5C0000000 %139 = fadd float %137, %138 %140 = call float @llvm.AMDGPU.lrp(float %38, float %139, float %132) %141 = call float @llvm.AMDGPU.lrp(float %38, float %139, float %133) %142 = call float @llvm.AMDGPU.lrp(float %38, float %139, float %134) %143 = fmul float %140, %124 %144 = fsub float %143, %132 %145 = fmul float %141, %125 %146 = fsub float %145, %133 %147 = fmul float %142, %126 %148 = fsub float %147, %134 %149 = bitcast float %78 to i32 %150 = bitcast float %79 to i32 %151 = insertelement <2 x i32> undef, i32 %149, i32 0 %152 = insertelement <2 x i32> %151, i32 %150, i32 1 %153 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %152, <32 x i8> %61, <16 x i8> %64, i32 2) %154 = extractelement <4 x float> %153, i32 1 %155 = fmul float %123, %154 %156 = fmul float %155, %144 %157 = fadd float %156, %132 %158 = fmul float %155, %146 %159 = fadd float %158, %133 %160 = fmul float %155, %148 %161 = fadd float %160, %134 %162 = fmul float %157, 0x3FD3333340000000 %163 = fmul float %159, 0x3FE2E147A0000000 %164 = fadd float %163, %162 %165 = fmul float %161, 0x3FBC28F5C0000000 %166 = fadd float %164, %165 %167 = call float @llvm.AMDGPU.lrp(float %40, float %166, float %157) %168 = call float @llvm.AMDGPU.lrp(float %40, float %166, float %159) %169 = call float @llvm.AMDGPU.lrp(float %40, float %166, float %161) %170 = fmul float %167, %41 %171 = fmul float %168, %41 %172 = fmul float %169, %41 %173 = fsub float 1.000000e+00, %24 %174 = fsub float 1.000000e+00, %25 %175 = fsub float 1.000000e+00, %26 %176 = fmul float %170, %173 %177 = fmul float %171, %174 %178 = fmul float %172, %175 %179 = fmul float %176, %30 %180 = fadd float %179, %27 %181 = fmul float %177, %30 %182 = fadd float %181, %28 %183 = fmul float %178, %30 %184 = fadd float %183, %29 %185 = fmul float %117, %101 %186 = fmul float %118, %102 %187 = fadd float %186, %185 %188 = fmul float %119, %103 %189 = fadd float %187, %188 %190 = fmul float %189, %117 %191 = fmul float %189, %118 %192 = fmul float %189, %119 %193 = fmul float %190, 2.000000e+00 %194 = fsub float %193, %101 %195 = fmul float %191, 2.000000e+00 %196 = fsub float %195, %102 %197 = fmul float %192, 2.000000e+00 %198 = fsub float %197, %103 %199 = fmul float %71, %71 %200 = fmul float %72, %72 %201 = fadd float %200, %199 %202 = fmul float %73, %73 %203 = fadd float %201, %202 %204 = call float @llvm.AMDGPU.rsq.clamped.f32(float %203) %205 = call float @llvm.minnum.f32(float %204, float 0x47EFFFFFE0000000) %206 = fmul float %71, %205 %207 = fmul float %72, %205 %208 = fmul float %73, %205 %209 = fmul float %194, %206 %210 = fmul float %196, %207 %211 = fadd float %210, %209 %212 = fmul float %198, %208 %213 = fadd float %211, %212 %214 = call float @llvm.AMDIL.clamp.(float %213, float 0.000000e+00, float 1.000000e+00) %215 = fmul float %117, %206 %216 = fmul float %118, %207 %217 = fadd float %216, %215 %218 = fmul float %119, %208 %219 = fadd float %217, %218 %220 = call float @llvm.AMDIL.clamp.(float %219, float 0.000000e+00, float 1.000000e+00) %221 = fadd float %214, 0xBEB0C6F7A0000000 %222 = call float @fabs(float %214) %223 = call float @llvm.pow.f32(float %222, float 1.500000e+01) %224 = fmul float %223, 0x3FED48D5A0000000 %225 = bitcast float %78 to i32 %226 = bitcast float %79 to i32 %227 = insertelement <2 x i32> undef, i32 %225, i32 0 %228 = insertelement <2 x i32> %227, i32 %226, i32 1 %229 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %228, <32 x i8> %67, <16 x i8> %70, i32 2) %230 = extractelement <4 x float> %229, i32 0 %231 = extractelement <4 x float> %229, i32 1 %232 = extractelement <4 x float> %229, i32 2 %233 = fmul float %230, %42 %234 = fmul float %231, %42 %235 = fmul float %232, %42 %236 = fmul float %233, %41 %237 = fmul float %234, %41 %238 = fmul float %235, %41 %239 = fmul float %236, %34 %240 = fadd float %239, %31 %241 = fmul float %237, %34 %242 = fadd float %241, %32 %243 = fmul float %238, %34 %244 = fadd float %243, %33 %245 = fmul float %224, %240 %246 = fmul float %224, %242 %247 = fmul float %224, %244 %248 = call float @llvm.AMDGPU.cndlt(float %221, float 0.000000e+00, float %245) %249 = call float @llvm.AMDGPU.cndlt(float %221, float 0.000000e+00, float %246) %250 = call float @llvm.AMDGPU.cndlt(float %221, float 0.000000e+00, float %247) %251 = fadd float %220, 0xBEB0C6F7A0000000 %252 = call float @llvm.AMDGPU.cndlt(float %251, float 0.000000e+00, float %220) %253 = fmul float %180, %252 %254 = fadd float %253, %248 %255 = fmul float %182, %252 %256 = fadd float %255, %249 %257 = fmul float %184, %252 %258 = fadd float %257, %250 %259 = fmul float %254, %43 %260 = fadd float %259, %24 %261 = fmul float %256, %44 %262 = fadd float %261, %25 %263 = fmul float %258, %45 %264 = fadd float %263, %26 %265 = fmul float %180, %46 %266 = fadd float %265, %260 %267 = fmul float %182, %47 %268 = fadd float %267, %262 %269 = fmul float %184, %48 %270 = fadd float %269, %264 %271 = fmul float %74, 3.906250e-03 %272 = call i32 @llvm.SI.packf16(float %266, float %268) %273 = bitcast i32 %272 to float %274 = call i32 @llvm.SI.packf16(float %270, float %271) %275 = bitcast i32 %274 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %273, float %275, float %273, float %275) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_load_dwordx4 s[44:47], s[4:5], 0x0 ; C0960500 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 1, [m0] ; C8140700 v_interp_p2_f32 v5, [v5], v1, 3, 1, [m0] ; C8150701 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 v_interp_p1_f32 v8, v0, 2, 2, [m0] ; C8200A00 v_interp_p2_f32 v8, [v8], v1, 2, 2, [m0] ; C8210A01 v_interp_p1_f32 v9, v0, 0, 3, [m0] ; C8240C00 v_interp_p2_f32 v9, [v9], v1, 0, 3, [m0] ; C8250C01 v_interp_p1_f32 v10, v0, 1, 3, [m0] ; C8280D00 v_interp_p2_f32 v10, [v10], v1, 1, 3, [m0] ; C8290D01 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 s_load_dwordx4 s[20:23], s[4:5], 0x8 ; C08A0508 s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C s_load_dwordx8 s[48:55], s[6:7], 0x0 ; C0D80700 s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708 s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710 s_load_dwordx8 s[12:19], s[6:7], 0x18 ; C0C60718 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[48:55], s[44:47] ; F0800700 016C0B09 image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[36:43], s[32:35] ; F0800700 01090E09 v_mul_f32_e32 v0, v6, v6 ; 10000D06 v_mad_f32 v0, v7, v7, v0 ; D2820000 04020F07 v_mad_f32 v0, v8, v8, v0 ; D2820000 04021108 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v1, 2.0, v11, -1.0 ; D2820001 03CE16F4 v_mad_f32 v11, 2.0, v12, -1.0 ; D282000B 03CE18F4 v_mad_f32 v12, 2.0, v13, -1.0 ; D282000C 03CE1AF4 v_min_f32_e32 v0, 0x7f7fffff, v0 ; 1E0000FF 7F7FFFFF v_mul_f32_e32 v13, v0, v6 ; 101A0D00 v_mul_f32_e32 v17, v0, v7 ; 10220F00 v_mul_f32_e32 v18, v13, v1 ; 1024030D v_mad_f32 v18, v11, v17, v18 ; D2820012 044A230B v_mul_f32_e32 v19, v0, v8 ; 10261100 v_mad_f32 v18, v12, v19, v18 ; D2820012 044A270C s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C v_add_f32_e64 v18, 0, v18 clamp ; D2060812 00022480 v_mov_b32_e32 v20, 0xb58637bd ; 7E2802FF B58637BD v_add_f32_e32 v21, v20, v18 ; 062A2514 v_cmp_gt_f32_e32 vcc, 0, v21 ; 7C082A80 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, 0x3e99999a, v14 ; 102A1CFF 3E99999A v_mov_b32_e32 v22, 0x3f170a3d ; 7E2C02FF 3F170A3D v_mad_f32 v21, v15, v22, v21 ; D2820015 04562D0F v_mov_b32_e32 v23, 0x3de147ae ; 7E2E02FF 3DE147AE v_mad_f32 v21, v16, v23, v21 ; D2820015 04562F10 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v24, 1.0, s4 ; D2080018 000008F2 v_mul_f32_e32 v25, v14, v24 ; 1032310E v_mad_f32 v25, s4, v21, v25 ; D2820019 04662A04 v_mul_f32_e32 v26, v15, v24 ; 1034310F v_mad_f32 v26, s4, v21, v26 ; D282001A 046A2A04 v_mul_f32_e32 v24, v16, v24 ; 10303110 v_mad_f32 v21, s4, v21, v24 ; D2820015 04622A04 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D v_mov_b32_e32 v24, 0x7fffffff ; 7E3002FF 7FFFFFFF v_and_b32_e32 v18, v18, v24 ; 36243112 v_log_f32_e32 v18, v18 ; 7E244F12 s_buffer_load_dword s5, s[0:3], 0x18 ; C2028118 s_buffer_load_dword s6, s[0:3], 0x19 ; C2030119 s_buffer_load_dword s7, s[0:3], 0x1a ; C203811A s_buffer_load_dword s32, s[0:3], 0x1e ; C210011E s_buffer_load_dword s33, s[0:3], 0x1f ; C210811F s_buffer_load_dword s34, s[0:3], 0x20 ; C2110120 s_buffer_load_dword s35, s[0:3], 0x24 ; C2118124 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_legacy_f32_e32 v18, s4, v18 ; 0E242404 v_exp_f32_e32 v18, v18 ; 7E244B12 v_cndmask_b32_e64 v18, v18, 0, vcc ; D2000012 01A90112 v_mul_f32_e32 v27, s5, v18 ; 10362405 v_mad_f32 v25, v25, v27, -v14 ; D2820019 843A3719 v_mul_f32_e32 v27, s6, v18 ; 10362406 v_mad_f32 v26, v26, v27, -v15 ; D282001A 843E371A v_mul_f32_e32 v27, s7, v18 ; 10362407 v_mad_f32 v21, v21, v27, -v16 ; D2820015 84423715 image_sample v27, 2, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[24:31], s[20:23] ; F0800200 00A61B09 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v18, v27, v18 ; 1024251B v_mad_f32 v14, v18, v25, v14 ; D282000E 043A3312 v_mad_f32 v15, v18, v26, v15 ; D282000F 043E3512 v_mad_f32 v16, v18, v21, v16 ; D2820010 04422B12 image_sample v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[12:19], s[8:11] ; F0800700 00431909 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v9, s34, v25 ; 10123222 v_mul_f32_e32 v10, s34, v26 ; 10143422 v_mul_f32_e32 v18, s34, v27 ; 10243622 v_mul_f32_e32 v21, 0x3e99999a, v14 ; 102A1CFF 3E99999A v_mad_f32 v21, v15, v22, v21 ; D2820015 04562D0F v_mad_f32 v21, v16, v23, v21 ; D2820015 04562F10 v_sub_f32_e64 v22, 1.0, s32 ; D2080016 000040F2 v_mul_f32_e32 v14, v14, v22 ; 101C2D0E v_mul_f32_e32 v15, v15, v22 ; 101E2D0F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 v_mul_f32_e32 v16, v16, v22 ; 10202D10 v_mad_f32 v14, s32, v21, v14 ; D282000E 043A2A20 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 v_mad_f32 v15, s32, v21, v15 ; D282000F 043E2A20 v_mad_f32 v16, s32, v21, v16 ; D2820010 04422A20 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 v_mul_f32_e32 v14, s33, v14 ; 101C1C21 s_buffer_load_dword s7, s[0:3], 0x10 ; C2038110 s_buffer_load_dword s8, s[0:3], 0x11 ; C2040111 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v21, 1.0, s4 ; D2080015 000008F2 v_mul_f32_e32 v14, v21, v14 ; 101C1D15 v_mul_f32_e32 v15, s33, v15 ; 101E1E21 v_sub_f32_e64 v21, 1.0, s5 ; D2080015 00000AF2 v_mul_f32_e32 v15, v21, v15 ; 101E1F15 v_mul_f32_e32 v16, s33, v16 ; 10202021 v_sub_f32_e64 v21, 1.0, s6 ; D2080015 00000CF2 v_mul_f32_e32 v16, v21, v16 ; 10202115 s_buffer_load_dword s9, s[0:3], 0x13 ; C2048113 s_buffer_load_dword s10, s[0:3], 0x12 ; C2050112 v_mul_f32_e32 v21, v1, v1 ; 102A0301 v_mad_f32 v21, v11, v11, v21 ; D2820015 0456170B v_mad_f32 v21, v12, v12, v21 ; D2820015 0456190C v_rsq_clamp_f32_e32 v21, v21 ; 7E2A5915 s_buffer_load_dword s11, s[0:3], 0x17 ; C2058117 s_buffer_load_dword s12, s[0:3], 0x14 ; C2060114 s_buffer_load_dword s13, s[0:3], 0x15 ; C2068115 s_buffer_load_dword s14, s[0:3], 0x16 ; C2070116 v_mov_b32_e32 v22, s7 ; 7E2C0207 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v14, s9, v14, v22 ; D282000E 045A1C09 v_mov_b32_e32 v22, s8 ; 7E2C0208 v_mad_f32 v15, s9, v15, v22 ; D282000F 045A1E09 v_mov_b32_e32 v22, s10 ; 7E2C020A v_mad_f32 v16, s9, v16, v22 ; D2820010 045A2009 v_min_f32_e32 v21, 0x7f7fffff, v21 ; 1E2A2AFF 7F7FFFFF v_mul_f32_e32 v1, v21, v1 ; 10020315 v_mul_f32_e32 v11, v21, v11 ; 10161715 v_mul_f32_e32 v12, v21, v12 ; 10181915 v_mul_f32_e32 v9, s33, v9 ; 10121221 v_mov_b32_e32 v21, s12 ; 7E2A020C v_mad_f32 v9, s11, v9, v21 ; D2820009 0456120B v_mul_f32_e32 v10, s33, v10 ; 10141421 v_mov_b32_e32 v21, s13 ; 7E2A020D v_mad_f32 v10, s11, v10, v21 ; D282000A 0456140B v_mul_f32_e32 v18, s33, v18 ; 10242421 v_mov_b32_e32 v21, s14 ; 7E2A020E v_mad_f32 v18, s11, v18, v21 ; D2820012 0456240B s_buffer_load_dword s7, s[0:3], 0x25 ; C2038125 s_buffer_load_dword s8, s[0:3], 0x26 ; C2040126 s_buffer_load_dword s9, s[0:3], 0x28 ; C2048128 s_buffer_load_dword s10, s[0:3], 0x29 ; C2050129 s_buffer_load_dword s0, s[0:3], 0x2a ; C200012A v_mul_f32_e32 v13, v13, v1 ; 101A030D v_mad_f32 v13, v11, v17, v13 ; D282000D 0436230B v_mad_f32 v13, v12, v19, v13 ; D282000D 0436270C v_mul_f32_e32 v17, v1, v13 ; 10221B01 v_mad_f32 v17, v13, v1, v17 ; D2820011 0446030D v_mad_f32 v6, -v6, v0, v17 ; D2820006 24460106 v_mul_f32_e32 v17, v11, v13 ; 10221B0B v_mad_f32 v17, v13, v11, v17 ; D2820011 0446170D v_mad_f32 v7, -v7, v0, v17 ; D2820007 24460107 v_mul_f32_e32 v17, v2, v2 ; 10220502 v_mad_f32 v17, v3, v3, v17 ; D2820011 04460703 v_mad_f32 v17, v4, v4, v17 ; D2820011 04460904 v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 v_mul_f32_e32 v19, v12, v13 ; 10261B0C v_mad_f32 v13, v13, v12, v19 ; D282000D 044E190D v_mad_f32 v0, -v8, v0, v13 ; D2820000 24360108 v_min_f32_e32 v8, 0x7f7fffff, v17 ; 1E1022FF 7F7FFFFF v_mul_f32_e32 v2, v8, v2 ; 10040508 v_mul_f32_e32 v3, v8, v3 ; 10060708 v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mad_f32 v6, v7, v3, v6 ; D2820006 041A0707 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_mad_f32 v1, v11, v3, v1 ; D2820001 0406070B v_mul_f32_e32 v2, v8, v4 ; 10040908 v_mad_f32 v0, v0, v2, v6 ; D2820000 041A0500 v_mad_f32 v1, v12, v2, v1 ; D2820001 0406050C v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_and_b32_e32 v2, v0, v24 ; 36043100 v_log_f32_e32 v2, v2 ; 7E044F02 v_add_f32_e32 v3, v20, v1 ; 06060314 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 v_add_f32_e32 v0, v20, v0 ; 06000114 v_mov_b32_e32 v3, s4 ; 7E060204 v_mul_legacy_f32_e32 v2, 0x41700000, v2 ; 0E0404FF 41700000 v_exp_f32_e32 v2, v2 ; 7E044B02 v_mul_f32_e32 v2, 0x3f6a46ad, v2 ; 100404FF 3F6A46AD v_mul_f32_e32 v4, v9, v2 ; 10080509 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v4, 0, vcc ; D2000000 01A90104 v_mad_f32 v0, v14, v1, v0 ; D2820000 0402030E v_mad_f32 v0, s35, v0, v3 ; D2820000 040E0023 v_mov_b32_e32 v3, s5 ; 7E060205 v_mul_f32_e32 v4, v10, v2 ; 1008050A v_mul_f32_e32 v2, v18, v2 ; 10040512 v_cndmask_b32_e64 v4, v4, 0, vcc ; D2000004 01A90104 v_mad_f32 v4, v15, v1, v4 ; D2820004 0412030F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s7, v4, v3 ; D2820003 040E0807 v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_mad_f32 v1, v16, v1, v2 ; D2820001 040A0310 v_mov_b32_e32 v2, s6 ; 7E040206 v_mad_f32 v1, s8, v1, v2 ; D2820001 040A0208 v_mad_f32 v0, v14, s9, v0 ; D2820000 0400130E v_mad_f32 v2, v15, s10, v3 ; D2820002 040C150F v_mad_f32 v1, v16, s0, v1 ; D2820001 04040110 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 v_mul_f32_e32 v2, 0x3b800000, v5 ; 10040AFF 3B800000 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 28 Code Size: 1216 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[4], PERSPECTIVE DCL IN[1], TEXCOORD[5], PERSPECTIVE DCL IN[2], TEXCOORD[6], PERSPECTIVE DCL IN[3], TEXCOORD[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL CONST[0..10] DCL TEMP[0] DCL TEMP[1], LOCAL DCL TEMP[2..7] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { 0.8000, 15.0000, 0.9151, 0.0039} IMM[2] FLT32 { -0.0000, 0.3000, 0.5900, 0.1100} IMM[3] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: TEX TEMP[0], IN[3], SAMP[0], 2D 1: MAD TEMP[0].xyz, TEMP[0], IMM[0].xxxx, IMM[0].yyyy 2: DP3 TEMP[1].x, IN[2], IN[2] 3: RSQ TEMP[1].x, TEMP[1].xxxx 4: MIN TEMP[1].x, IMM[3].xxxx, TEMP[1].xxxx 5: MUL TEMP[2].xyz, IN[2], TEMP[1].xxxx 6: DP3 TEMP[0].w, TEMP[0], TEMP[2] 7: MAX TEMP[2].w, TEMP[0].wwww, IMM[0].wwww 8: MOV_SAT TEMP[0].w, TEMP[2].wwww 9: ADD TEMP[2].w, -TEMP[2].wwww, IMM[0].zzzz 10: ADD TEMP[3].x, TEMP[0].wwww, IMM[2].xxxx 11: POW TEMP[3].y, |TEMP[0].wwww|, CONST[7].zzzz 12: CMP TEMP[0].w, TEMP[3].xxxx, IMM[0].wwww, TEMP[3].yyyy 13: MUL TEMP[3].xyz, TEMP[0].wwww, CONST[6] 14: TEX TEMP[4], IN[3], SAMP[2], 2D 15: DP3 TEMP[3].w, TEMP[4], IMM[2].yzww 16: LRP TEMP[5].xyz, CONST[7].yyyy, TEMP[3].wwww, TEMP[4] 17: LRP TEMP[6].xyz, CONST[7].wwww, TEMP[3].wwww, TEMP[4] 18: ADD TEMP[6].xyz, TEMP[6], TEMP[6] 19: MAX TEMP[7].xyz, |TEMP[6]|, -IMM[2].xxxx 20: MAD TEMP[3].xyz, TEMP[5], TEMP[3], -TEMP[4] 21: TEX TEMP[5], IN[3], SAMP[1], 2D 22: MUL TEMP[0].w, TEMP[0].wwww, TEMP[5].yyyy 23: MAD TEMP[3].xyz, TEMP[0].wwww, TEMP[3], TEMP[4] 24: MAD TEMP[5].xyw, TEMP[7].xyzz, TEMP[7].xyzz, -TEMP[3].xyzz 25: DP3 TEMP[1].x, TEMP[0], TEMP[0] 26: RSQ TEMP[1].x, TEMP[1].xxxx 27: MIN TEMP[1].x, IMM[3].xxxx, TEMP[1].xxxx 28: MUL TEMP[6].xyz, TEMP[0], TEMP[1].xxxx 29: DP3 TEMP[0].w, TEMP[6], TEMP[2] 30: MUL TEMP[7].xyz, TEMP[0].wwww, TEMP[6] 31: MAD TEMP[2].xyz, TEMP[7], IMM[0].xxxx, -TEMP[2] 32: DP3 TEMP[0].x, TEMP[2], TEMP[0] 33: MAX TEMP[3].w, TEMP[0].xxxx, IMM[0].wwww 34: MIN TEMP[0].x, TEMP[3].wwww, IMM[1].xxxx 35: MUL TEMP[0].x, TEMP[0].xxxx, CONST[8].xxxx 36: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[5].zzzz 37: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[5].xyww, TEMP[3] 38: MUL TEMP[0].w, |TEMP[2].wwww|, |TEMP[2].wwww| 39: MUL TEMP[0].w, TEMP[0].wwww, |TEMP[2].wwww| 40: ADD TEMP[2].w, |TEMP[2].wwww|, IMM[2].xxxx 41: MUL TEMP[0].w, TEMP[0].wwww, CONST[7].xxxx 42: MUL TEMP[0].w, TEMP[5].zzzz, TEMP[0].wwww 43: MUL TEMP[3].xyz, TEMP[4], TEMP[0].wwww 44: CMP TEMP[3].xyz, TEMP[2].wwww, IMM[0].wwww, TEMP[3] 45: ADD TEMP[0].xyz, TEMP[0], TEMP[3] 46: DP3 TEMP[0].w, TEMP[0], IMM[2].yzww 47: LRP TEMP[3].xyz, CONST[8].yyyy, TEMP[0].wwww, TEMP[0] 48: MUL TEMP[0].xyz, TEMP[3], CONST[8].zzzz 49: MOV TEMP[3].z, IMM[0].zzzz 50: ADD TEMP[3].xyz, TEMP[3].zzzz, -CONST[0] 51: MUL TEMP[0].xyz, TEMP[0], TEMP[3] 52: MAD TEMP[0].xyz, TEMP[0], CONST[4].wwww, CONST[4] 53: TEX TEMP[3], IN[3], SAMP[3], 2D 54: MUL TEMP[3].xyz, TEMP[3], CONST[8].wwww 55: MUL TEMP[3].xyz, TEMP[3], CONST[8].zzzz 56: MAD TEMP[3].xyz, TEMP[3], CONST[5].wwww, CONST[5] 57: DP3 TEMP[1].x, IN[0], IN[0] 58: RSQ TEMP[1].x, TEMP[1].xxxx 59: MIN TEMP[1].x, IMM[3].xxxx, TEMP[1].xxxx 60: MUL TEMP[4].xyz, IN[0], TEMP[1].xxxx 61: DP3_SAT TEMP[0].w, TEMP[2], TEMP[4] 62: DP3_SAT TEMP[2].x, TEMP[6], TEMP[4] 63: POW TEMP[2].y, |TEMP[0].wwww|, IMM[1].yyyy 64: ADD TEMP[0].w, TEMP[0].wwww, IMM[2].xxxx 65: MUL TEMP[2].y, TEMP[2].yyyy, IMM[1].zzzz 66: MUL TEMP[2].yzw, TEMP[3].xxyz, TEMP[2].yyyy 67: CMP TEMP[2].yzw, TEMP[0].wwww, IMM[0].wwww, TEMP[2] 68: ADD TEMP[0].w, TEMP[2].xxxx, IMM[2].xxxx 69: CMP TEMP[0].w, TEMP[0].wwww, IMM[0].wwww, TEMP[2].xxxx 70: MAD TEMP[2].xyz, TEMP[0], TEMP[0].wwww, TEMP[2].yzww 71: MOV TEMP[3].xyz, CONST[0] 72: MAD TEMP[2].xyz, TEMP[2], CONST[9], TEMP[3] 73: MAD OUT[0].xyz, TEMP[0], CONST[10], TEMP[2] 74: MUL OUT[0].w, IMM[1].wwww, IN[1].wwww 75: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %52 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %53 = load <32 x i8>, <32 x i8> addrspace(2)* %52, align 32, !tbaa !0 %54 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %57 = bitcast <8 x i32> addrspace(2)* %56 to <32 x i8> addrspace(2)* %58 = load <32 x i8>, <32 x i8> addrspace(2)* %57, align 32, !tbaa !0 %59 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %60 = bitcast <4 x i32> addrspace(2)* %59 to <16 x i8> addrspace(2)* %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 %62 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %63 = bitcast <8 x i32> addrspace(2)* %62 to <32 x i8> addrspace(2)* %64 = load <32 x i8>, <32 x i8> addrspace(2)* %63, align 32, !tbaa !0 %65 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %66 = bitcast <4 x i32> addrspace(2)* %65 to <16 x i8> addrspace(2)* %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %69 = bitcast <8 x i32> addrspace(2)* %68 to <32 x i8> addrspace(2)* %70 = load <32 x i8>, <32 x i8> addrspace(2)* %69, align 32, !tbaa !0 %71 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %72 = bitcast <4 x i32> addrspace(2)* %71 to <16 x i8> addrspace(2)* %73 = load <16 x i8>, <16 x i8> addrspace(2)* %72, align 16, !tbaa !0 %74 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %83 = bitcast float %81 to i32 %84 = bitcast float %82 to i32 %85 = insertelement <2 x i32> undef, i32 %83, i32 0 %86 = insertelement <2 x i32> %85, i32 %84, i32 1 %87 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %86, <32 x i8> %53, <16 x i8> %55, i32 2) %88 = extractelement <4 x float> %87, i32 0 %89 = extractelement <4 x float> %87, i32 1 %90 = extractelement <4 x float> %87, i32 2 %91 = fmul float %88, 2.000000e+00 %92 = fadd float %91, -1.000000e+00 %93 = fmul float %89, 2.000000e+00 %94 = fadd float %93, -1.000000e+00 %95 = fmul float %90, 2.000000e+00 %96 = fadd float %95, -1.000000e+00 %97 = fmul float %78, %78 %98 = fmul float %79, %79 %99 = fadd float %98, %97 %100 = fmul float %80, %80 %101 = fadd float %99, %100 %102 = call float @llvm.AMDGPU.rsq.clamped.f32(float %101) %103 = call float @llvm.minnum.f32(float %102, float 0x47EFFFFFE0000000) %104 = fmul float %78, %103 %105 = fmul float %79, %103 %106 = fmul float %80, %103 %107 = fmul float %92, %104 %108 = fmul float %94, %105 %109 = fadd float %108, %107 %110 = fmul float %96, %106 %111 = fadd float %109, %110 %112 = call float @llvm.maxnum.f32(float %111, float 0.000000e+00) %113 = call float @llvm.AMDIL.clamp.(float %112, float 0.000000e+00, float 1.000000e+00) %114 = fsub float 1.000000e+00, %112 %115 = fadd float %113, 0xBEB0C6F7A0000000 %116 = call float @fabs(float %113) %117 = call float @llvm.pow.f32(float %116, float %40) %118 = call float @llvm.AMDGPU.cndlt(float %115, float 0.000000e+00, float %117) %119 = fmul float %118, %35 %120 = fmul float %118, %36 %121 = fmul float %118, %37 %122 = bitcast float %81 to i32 %123 = bitcast float %82 to i32 %124 = insertelement <2 x i32> undef, i32 %122, i32 0 %125 = insertelement <2 x i32> %124, i32 %123, i32 1 %126 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %125, <32 x i8> %64, <16 x i8> %67, i32 2) %127 = extractelement <4 x float> %126, i32 0 %128 = extractelement <4 x float> %126, i32 1 %129 = extractelement <4 x float> %126, i32 2 %130 = fmul float %127, 0x3FD3333340000000 %131 = fmul float %128, 0x3FE2E147A0000000 %132 = fadd float %131, %130 %133 = fmul float %129, 0x3FBC28F5C0000000 %134 = fadd float %132, %133 %135 = call float @llvm.AMDGPU.lrp(float %39, float %134, float %127) %136 = call float @llvm.AMDGPU.lrp(float %39, float %134, float %128) %137 = call float @llvm.AMDGPU.lrp(float %39, float %134, float %129) %138 = call float @llvm.AMDGPU.lrp(float %41, float %134, float %127) %139 = call float @llvm.AMDGPU.lrp(float %41, float %134, float %128) %140 = call float @llvm.AMDGPU.lrp(float %41, float %134, float %129) %141 = fadd float %138, %138 %142 = fadd float %139, %139 %143 = fadd float %140, %140 %144 = call float @fabs(float %141) %145 = call float @llvm.maxnum.f32(float %144, float 0x3EB0C6F7A0000000) %146 = call float @fabs(float %142) %147 = call float @llvm.maxnum.f32(float %146, float 0x3EB0C6F7A0000000) %148 = call float @fabs(float %143) %149 = call float @llvm.maxnum.f32(float %148, float 0x3EB0C6F7A0000000) %150 = fmul float %135, %119 %151 = fsub float %150, %127 %152 = fmul float %136, %120 %153 = fsub float %152, %128 %154 = fmul float %137, %121 %155 = fsub float %154, %129 %156 = bitcast float %81 to i32 %157 = bitcast float %82 to i32 %158 = insertelement <2 x i32> undef, i32 %156, i32 0 %159 = insertelement <2 x i32> %158, i32 %157, i32 1 %160 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %159, <32 x i8> %58, <16 x i8> %61, i32 2) %161 = extractelement <4 x float> %160, i32 1 %162 = extractelement <4 x float> %160, i32 2 %163 = fmul float %118, %161 %164 = fmul float %163, %151 %165 = fadd float %164, %127 %166 = fmul float %163, %153 %167 = fadd float %166, %128 %168 = fmul float %163, %155 %169 = fadd float %168, %129 %170 = fmul float %145, %145 %171 = fsub float %170, %165 %172 = fmul float %147, %147 %173 = fsub float %172, %167 %174 = fmul float %149, %149 %175 = fsub float %174, %169 %176 = fmul float %92, %92 %177 = fmul float %94, %94 %178 = fadd float %177, %176 %179 = fmul float %96, %96 %180 = fadd float %178, %179 %181 = call float @llvm.AMDGPU.rsq.clamped.f32(float %180) %182 = call float @llvm.minnum.f32(float %181, float 0x47EFFFFFE0000000) %183 = fmul float %92, %182 %184 = fmul float %94, %182 %185 = fmul float %96, %182 %186 = fmul float %183, %104 %187 = fmul float %184, %105 %188 = fadd float %187, %186 %189 = fmul float %185, %106 %190 = fadd float %188, %189 %191 = fmul float %190, %183 %192 = fmul float %190, %184 %193 = fmul float %190, %185 %194 = fmul float %191, 2.000000e+00 %195 = fsub float %194, %104 %196 = fmul float %192, 2.000000e+00 %197 = fsub float %196, %105 %198 = fmul float %193, 2.000000e+00 %199 = fsub float %198, %106 %200 = fmul float %195, %92 %201 = fmul float %197, %94 %202 = fadd float %201, %200 %203 = fmul float %199, %96 %204 = fadd float %202, %203 %205 = call float @llvm.maxnum.f32(float %204, float 0.000000e+00) %206 = call float @llvm.minnum.f32(float %205, float 0x3FE99999A0000000) %207 = fmul float %206, %42 %208 = fmul float %207, %162 %209 = fmul float %208, %171 %210 = fadd float %209, %165 %211 = fmul float %208, %173 %212 = fadd float %211, %167 %213 = fmul float %208, %175 %214 = fadd float %213, %169 %215 = call float @fabs(float %114) %216 = call float @fabs(float %114) %217 = fmul float %215, %216 %218 = call float @fabs(float %114) %219 = fmul float %217, %218 %220 = call float @fabs(float %114) %221 = fadd float %220, 0xBEB0C6F7A0000000 %222 = fmul float %219, %38 %223 = fmul float %162, %222 %224 = fmul float %127, %223 %225 = fmul float %128, %223 %226 = fmul float %129, %223 %227 = call float @llvm.AMDGPU.cndlt(float %221, float 0.000000e+00, float %224) %228 = call float @llvm.AMDGPU.cndlt(float %221, float 0.000000e+00, float %225) %229 = call float @llvm.AMDGPU.cndlt(float %221, float 0.000000e+00, float %226) %230 = fadd float %210, %227 %231 = fadd float %212, %228 %232 = fadd float %214, %229 %233 = fmul float %230, 0x3FD3333340000000 %234 = fmul float %231, 0x3FE2E147A0000000 %235 = fadd float %234, %233 %236 = fmul float %232, 0x3FBC28F5C0000000 %237 = fadd float %235, %236 %238 = call float @llvm.AMDGPU.lrp(float %43, float %237, float %230) %239 = call float @llvm.AMDGPU.lrp(float %43, float %237, float %231) %240 = call float @llvm.AMDGPU.lrp(float %43, float %237, float %232) %241 = fmul float %238, %44 %242 = fmul float %239, %44 %243 = fmul float %240, %44 %244 = fsub float 1.000000e+00, %24 %245 = fsub float 1.000000e+00, %25 %246 = fsub float 1.000000e+00, %26 %247 = fmul float %241, %244 %248 = fmul float %242, %245 %249 = fmul float %243, %246 %250 = fmul float %247, %30 %251 = fadd float %250, %27 %252 = fmul float %248, %30 %253 = fadd float %252, %28 %254 = fmul float %249, %30 %255 = fadd float %254, %29 %256 = bitcast float %81 to i32 %257 = bitcast float %82 to i32 %258 = insertelement <2 x i32> undef, i32 %256, i32 0 %259 = insertelement <2 x i32> %258, i32 %257, i32 1 %260 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %259, <32 x i8> %70, <16 x i8> %73, i32 2) %261 = extractelement <4 x float> %260, i32 0 %262 = extractelement <4 x float> %260, i32 1 %263 = extractelement <4 x float> %260, i32 2 %264 = fmul float %261, %45 %265 = fmul float %262, %45 %266 = fmul float %263, %45 %267 = fmul float %264, %44 %268 = fmul float %265, %44 %269 = fmul float %266, %44 %270 = fmul float %267, %34 %271 = fadd float %270, %31 %272 = fmul float %268, %34 %273 = fadd float %272, %32 %274 = fmul float %269, %34 %275 = fadd float %274, %33 %276 = fmul float %74, %74 %277 = fmul float %75, %75 %278 = fadd float %277, %276 %279 = fmul float %76, %76 %280 = fadd float %278, %279 %281 = call float @llvm.AMDGPU.rsq.clamped.f32(float %280) %282 = call float @llvm.minnum.f32(float %281, float 0x47EFFFFFE0000000) %283 = fmul float %74, %282 %284 = fmul float %75, %282 %285 = fmul float %76, %282 %286 = fmul float %195, %283 %287 = fmul float %197, %284 %288 = fadd float %287, %286 %289 = fmul float %199, %285 %290 = fadd float %288, %289 %291 = call float @llvm.AMDIL.clamp.(float %290, float 0.000000e+00, float 1.000000e+00) %292 = fmul float %183, %283 %293 = fmul float %184, %284 %294 = fadd float %293, %292 %295 = fmul float %185, %285 %296 = fadd float %294, %295 %297 = call float @llvm.AMDIL.clamp.(float %296, float 0.000000e+00, float 1.000000e+00) %298 = call float @fabs(float %291) %299 = call float @llvm.pow.f32(float %298, float 1.500000e+01) %300 = fadd float %291, 0xBEB0C6F7A0000000 %301 = fmul float %299, 0x3FED48D5A0000000 %302 = fmul float %271, %301 %303 = fmul float %273, %301 %304 = fmul float %275, %301 %305 = call float @llvm.AMDGPU.cndlt(float %300, float 0.000000e+00, float %302) %306 = call float @llvm.AMDGPU.cndlt(float %300, float 0.000000e+00, float %303) %307 = call float @llvm.AMDGPU.cndlt(float %300, float 0.000000e+00, float %304) %308 = fadd float %297, 0xBEB0C6F7A0000000 %309 = call float @llvm.AMDGPU.cndlt(float %308, float 0.000000e+00, float %297) %310 = fmul float %251, %309 %311 = fadd float %310, %305 %312 = fmul float %253, %309 %313 = fadd float %312, %306 %314 = fmul float %255, %309 %315 = fadd float %314, %307 %316 = fmul float %311, %46 %317 = fadd float %316, %24 %318 = fmul float %313, %47 %319 = fadd float %318, %25 %320 = fmul float %315, %48 %321 = fadd float %320, %26 %322 = fmul float %251, %49 %323 = fadd float %322, %317 %324 = fmul float %253, %50 %325 = fadd float %324, %319 %326 = fmul float %255, %51 %327 = fadd float %326, %321 %328 = fmul float %77, 3.906250e-03 %329 = call i32 @llvm.SI.packf16(float %323, float %325) %330 = bitcast i32 %329 to float %331 = call i32 @llvm.SI.packf16(float %327, float %328) %332 = bitcast i32 %331 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %330, float %332, float %330, float %332) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_load_dwordx4 s[44:47], s[4:5], 0x0 ; C0960500 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 1, [m0] ; C8140700 v_interp_p2_f32 v5, [v5], v1, 3, 1, [m0] ; C8150701 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 v_interp_p1_f32 v8, v0, 2, 2, [m0] ; C8200A00 v_interp_p2_f32 v8, [v8], v1, 2, 2, [m0] ; C8210A01 v_interp_p1_f32 v9, v0, 0, 3, [m0] ; C8240C00 v_interp_p2_f32 v9, [v9], v1, 0, 3, [m0] ; C8250C01 v_interp_p1_f32 v10, v0, 1, 3, [m0] ; C8280D00 v_interp_p2_f32 v10, [v10], v1, 1, 3, [m0] ; C8290D01 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx4 s[32:35], s[4:5], 0x8 ; C0900508 s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C s_load_dwordx8 s[48:55], s[6:7], 0x0 ; C0D80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 s_load_dwordx8 s[12:19], s[6:7], 0x18 ; C0C60718 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[48:55], s[44:47] ; F0800700 016C0B09 v_mul_f32_e32 v0, v6, v6 ; 10000D06 v_mad_f32 v0, v7, v7, v0 ; D2820000 04020F07 v_mad_f32 v0, v8, v8, v0 ; D2820000 04021108 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, 2.0, v11, -1.0 ; D2820001 03CE16F4 v_mad_f32 v11, 2.0, v12, -1.0 ; D282000B 03CE18F4 v_mad_f32 v12, 2.0, v13, -1.0 ; D282000C 03CE1AF4 v_min_f32_e32 v0, 0x7f7fffff, v0 ; 1E0000FF 7F7FFFFF v_mul_f32_e32 v13, v0, v6 ; 101A0D00 v_mul_f32_e32 v14, v0, v7 ; 101C0F00 v_mul_f32_e32 v15, v13, v1 ; 101E030D v_mad_f32 v15, v11, v14, v15 ; D282000F 043E1D0B v_mul_f32_e32 v16, v0, v8 ; 10201100 v_mad_f32 v15, v12, v16, v15 ; D282000F 043E210C v_max_f32_e32 v15, 0, v15 ; 201E1E80 v_add_f32_e64 v17, 0, v15 clamp ; D2060811 00021E80 v_mov_b32_e32 v18, 0xb58637bd ; 7E2402FF B58637BD v_add_f32_e32 v19, v18, v17 ; 06262312 v_mov_b32_e32 v20, 0x7fffffff ; 7E2802FF 7FFFFFFF v_and_b32_e32 v17, v17, v20 ; 36222911 v_log_f32_e32 v17, v17 ; 7E224F11 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F v_cmp_gt_f32_e32 vcc, 0, v19 ; 7C082680 image_sample v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[36:43], s[32:35] ; F0800700 01091509 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v19, 0x3e99999a, v21 ; 10262AFF 3E99999A v_mov_b32_e32 v24, 0x3f170a3d ; 7E3002FF 3F170A3D v_mad_f32 v19, v22, v24, v19 ; D2820013 044E3116 v_mov_b32_e32 v25, 0x3de147ae ; 7E3202FF 3DE147AE v_mad_f32 v19, v23, v25, v19 ; D2820013 044E3317 s_buffer_load_dword s5, s[0:3], 0x20 ; C2028120 s_buffer_load_dword s6, s[0:3], 0x21 ; C2030121 v_sub_f32_e64 v26, 1.0, s4 ; D208001A 000008F2 v_mul_f32_e32 v27, v21, v26 ; 10363515 v_mad_f32 v27, s4, v19, v27 ; D282001B 046E2604 s_buffer_load_dword s7, s[0:3], 0x1e ; C203811E v_mul_f32_e32 v28, v22, v26 ; 10383516 v_mad_f32 v28, s4, v19, v28 ; D282001C 04722604 s_buffer_load_dword s32, s[0:3], 0x1d ; C210011D v_mul_f32_e32 v29, v23, v26 ; 103A3517 v_mad_f32 v29, s4, v19, v29 ; D282001D 04762604 v_mad_f32 v27, v26, v21, v27 ; D282001B 046E2B1A v_mad_f32 v28, v26, v22, v28 ; D282001C 04722D1A v_mad_f32 v26, v26, v23, v29 ; D282001A 04762F1A s_buffer_load_dword s33, s[0:3], 0x18 ; C2108118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_legacy_f32_e32 v17, s7, v17 ; 0E222207 v_exp_f32_e32 v17, v17 ; 7E224B11 v_cndmask_b32_e64 v17, v17, 0, vcc ; D2000011 01A90111 s_buffer_load_dword s7, s[0:3], 0x19 ; C2038119 v_sub_f32_e64 v29, 1.0, s32 ; D208001D 000040F2 v_mul_f32_e32 v30, v21, v29 ; 103C3B15 v_mad_f32 v30, s32, v19, v30 ; D282001E 047A2620 s_buffer_load_dword s34, s[0:3], 0x1a ; C211011A s_buffer_load_dword s35, s[0:3], 0x1c ; C211811C v_mul_f32_e32 v31, s33, v17 ; 103E2221 v_mad_f32 v30, v30, v31, -v21 ; D282001E 84563F1E v_mul_f32_e32 v31, v22, v29 ; 103E3B16 v_mad_f32 v31, s32, v19, v31 ; D282001F 047E2620 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v32, s7, v17 ; 10402207 v_mad_f32 v31, v31, v32, -v22 ; D282001F 845A411F v_mul_f32_e32 v29, v23, v29 ; 103A3B17 v_mad_f32 v29, s32, v19, v29 ; D282001D 04762620 v_mul_f32_e32 v32, s34, v17 ; 10402222 v_mad_f32 v29, v29, v32, -v23 ; D282001D 845E411D image_sample v[32:33], 6, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[24:31], s[20:23] ; F0800600 00A62009 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v17, v32, v17 ; 10222320 v_mad_f32 v30, v17, v30, v21 ; D282001E 04563D11 v_mad_f32 v31, v17, v31, v22 ; D282001F 045A3F11 v_mad_f32 v17, v17, v29, v23 ; D2820011 045E3B11 v_sub_f32_e32 v15, 1.0, v15 ; 081E1EF2 v_mul_f32_e64 v29, |v15|, |v15| ; D210031D 00021F0F v_mul_f32_e64 v29, v29, |v15| ; D210021D 00021F1D v_mul_f32_e32 v29, s35, v29 ; 103A3A23 v_mul_f32_e32 v32, v1, v1 ; 10400301 v_mad_f32 v32, v11, v11, v32 ; D2820020 0482170B v_mad_f32 v32, v12, v12, v32 ; D2820020 0482190C v_rsq_clamp_f32_e32 v32, v32 ; 7E405920 v_mul_f32_e32 v29, v29, v33 ; 103A431D v_mul_f32_e32 v21, v29, v21 ; 102A2B1D v_mul_f32_e32 v22, v29, v22 ; 102C2D1D v_mul_f32_e32 v23, v29, v23 ; 102E2F1D v_min_f32_e32 v29, 0x7f7fffff, v32 ; 1E3A40FF 7F7FFFFF v_mul_f32_e32 v32, v29, v1 ; 1040031D v_mul_f32_e32 v13, v13, v32 ; 101A410D v_mul_f32_e32 v34, v29, v11 ; 1044171D v_mad_f32 v13, v34, v14, v13 ; D282000D 04361D22 v_mul_f32_e32 v14, v29, v12 ; 101C191D v_mad_f32 v13, v14, v16, v13 ; D282000D 0436210E v_mul_f32_e32 v16, v32, v13 ; 10201B20 v_mad_f32 v16, v13, v32, v16 ; D2820010 0442410D v_mad_f32 v6, -v6, v0, v16 ; D2820006 24420106 v_mul_f32_e32 v16, v34, v13 ; 10201B22 v_mad_f32 v16, v13, v34, v16 ; D2820010 0442450D v_mad_f32 v7, -v7, v0, v16 ; D2820007 24420107 v_mul_f32_e32 v16, v14, v13 ; 10201B0E v_mad_f32 v13, v13, v14, v16 ; D282000D 04421D0D v_mad_f32 v0, -v8, v0, v13 ; D2820000 24360108 v_mul_f32_e32 v1, v1, v6 ; 10020D01 v_mad_f32 v1, v7, v11, v1 ; D2820001 04061707 v_mad_f32 v1, v0, v12, v1 ; D2820001 04061900 v_max_f32_e32 v1, 0, v1 ; 20020280 v_min_f32_e32 v1, 0x3f4ccccd, v1 ; 1E0202FF 3F4CCCCD v_mul_f32_e32 v1, s5, v1 ; 10020205 v_mul_f32_e32 v1, v33, v1 ; 10020321 v_mad_f32 v8, s4, v19, v27 ; D2820008 046E2604 v_mad_f32 v11, s4, v19, v28 ; D282000B 04722604 v_mad_f32 v12, s4, v19, v26 ; D282000C 046A2604 v_mov_b32_e32 v13, 0x358637bd ; 7E1A02FF 358637BD v_max_f32_e64 v8, |v8|, v13 ; D2200108 00021B08 v_max_f32_e64 v11, |v11|, v13 ; D220010B 00021B0B v_max_f32_e64 v12, |v12|, v13 ; D220010C 00021B0C v_mad_f32 v8, v8, v8, -v30 ; D2820008 847A1108 s_buffer_load_dword s4, s[0:3], 0x23 ; C2020123 v_mad_f32 v8, v1, v8, v30 ; D2820008 047A1101 v_mad_f32 v11, v11, v11, -v31 ; D282000B 847E170B v_mad_f32 v11, v1, v11, v31 ; D282000B 047E1701 v_mad_f32 v12, v12, v12, -v17 ; D282000C 8446190C v_mad_f32 v1, v1, v12, v17 ; D2820001 04461901 s_buffer_load_dword s5, s[0:3], 0x24 ; C2028124 s_buffer_load_dword s7, s[0:3], 0x25 ; C2038125 s_buffer_load_dword s20, s[0:3], 0x26 ; C20A0126 image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[12:19], s[8:11] ; F0800700 00431A09 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v9, s4, v26 ; 10123404 v_mul_f32_e32 v10, s4, v27 ; 10143604 v_mul_f32_e32 v12, s4, v28 ; 10183804 v_add_f32_e64 v13, |v15|, v18 ; D206010D 0002250F v_cmp_gt_f32_e32 vcc, 0, v13 ; 7C081A80 v_cndmask_b32_e64 v13, v21, 0, vcc ; D200000D 01A90115 v_add_f32_e32 v8, v13, v8 ; 0610110D s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_buffer_load_dword s8, s[0:3], 0x22 ; C2040122 s_buffer_load_dword s9, s[0:3], 0x0 ; C2048100 s_buffer_load_dword s10, s[0:3], 0x1 ; C2050101 s_buffer_load_dword s11, s[0:3], 0x2 ; C2058102 s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110 s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111 s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112 s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113 s_buffer_load_dword s16, s[0:3], 0x14 ; C2080114 s_buffer_load_dword s17, s[0:3], 0x15 ; C2088115 s_buffer_load_dword s18, s[0:3], 0x16 ; C2090116 s_buffer_load_dword s19, s[0:3], 0x28 ; C2098128 s_buffer_load_dword s21, s[0:3], 0x29 ; C20A8129 s_buffer_load_dword s0, s[0:3], 0x2a ; C200012A v_cndmask_b32_e64 v13, v22, 0, vcc ; D200000D 01A90116 v_cndmask_b32_e64 v15, v23, 0, vcc ; D200000F 01A90117 v_add_f32_e32 v11, v13, v11 ; 0616170D v_add_f32_e32 v1, v15, v1 ; 0602030F v_mul_f32_e32 v13, 0x3e99999a, v8 ; 101A10FF 3E99999A v_mad_f32 v13, v11, v24, v13 ; D282000D 0436310B v_mad_f32 v13, v1, v25, v13 ; D282000D 04363301 v_sub_f32_e64 v15, 1.0, s6 ; D208000F 00000CF2 v_mul_f32_e32 v8, v8, v15 ; 10101F08 v_mul_f32_e32 v11, v11, v15 ; 10161F0B v_mul_f32_e32 v1, v1, v15 ; 10021F01 v_mad_f32 v8, s6, v13, v8 ; D2820008 04221A06 v_mad_f32 v11, s6, v13, v11 ; D282000B 042E1A06 v_mad_f32 v1, s6, v13, v1 ; D2820001 04061A06 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s8, v8 ; 10101008 v_sub_f32_e64 v13, 1.0, s9 ; D208000D 000012F2 v_mul_f32_e32 v8, v13, v8 ; 1010110D v_mul_f32_e32 v11, s8, v11 ; 10161608 v_sub_f32_e64 v13, 1.0, s10 ; D208000D 000014F2 v_mul_f32_e32 v11, v13, v11 ; 1016170D v_mul_f32_e32 v1, s8, v1 ; 10020208 v_sub_f32_e64 v13, 1.0, s11 ; D208000D 000016F2 v_mul_f32_e32 v1, v13, v1 ; 1002030D v_mov_b32_e32 v13, s12 ; 7E1A020C v_mad_f32 v8, s15, v8, v13 ; D2820008 0436100F v_mov_b32_e32 v13, s13 ; 7E1A020D v_mad_f32 v11, s15, v11, v13 ; D282000B 0436160F v_mov_b32_e32 v13, s14 ; 7E1A020E v_mad_f32 v1, s15, v1, v13 ; D2820001 0436020F v_mul_f32_e32 v9, s8, v9 ; 10121208 v_mul_f32_e32 v10, s8, v10 ; 10141408 v_mul_f32_e32 v12, s8, v12 ; 10181808 v_mov_b32_e32 v13, s16 ; 7E1A0210 v_mul_f32_e32 v15, v2, v2 ; 101E0502 v_mad_f32 v15, v3, v3, v15 ; D282000F 043E0703 v_mad_f32 v15, v4, v4, v15 ; D282000F 043E0904 v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F v_mad_f32 v9, s4, v9, v13 ; D2820009 04361204 v_mov_b32_e32 v13, s17 ; 7E1A0211 v_mad_f32 v10, s4, v10, v13 ; D282000A 04361404 v_mov_b32_e32 v13, s18 ; 7E1A0212 v_mad_f32 v12, s4, v12, v13 ; D282000C 04361804 v_min_f32_e32 v13, 0x7f7fffff, v15 ; 1E1A1EFF 7F7FFFFF v_mul_f32_e32 v2, v13, v2 ; 1004050D v_mul_f32_e32 v3, v13, v3 ; 1006070D v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mad_f32 v6, v7, v3, v6 ; D2820006 041A0707 v_mul_f32_e32 v2, v2, v32 ; 10044102 v_mad_f32 v2, v34, v3, v2 ; D2820002 040A0722 v_mul_f32_e32 v3, v13, v4 ; 1006090D v_mad_f32 v0, v0, v3, v6 ; D2820000 041A0700 v_mad_f32 v2, v14, v3, v2 ; D2820002 040A070E v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_and_b32_e32 v3, v0, v20 ; 36062900 v_log_f32_e32 v3, v3 ; 7E064F03 v_add_f32_e32 v4, v18, v2 ; 06080512 v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_add_f32_e32 v0, v18, v0 ; 06000112 v_mov_b32_e32 v4, s9 ; 7E080209 v_mul_legacy_f32_e32 v3, 0x41700000, v3 ; 0E0606FF 41700000 v_exp_f32_e32 v3, v3 ; 7E064B03 v_mul_f32_e32 v3, 0x3f6a46ad, v3 ; 100606FF 3F6A46AD v_mul_f32_e32 v6, v3, v9 ; 100C1303 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v6, 0, vcc ; D2000000 01A90106 v_mad_f32 v0, v8, v2, v0 ; D2820000 04020508 v_mad_f32 v0, s5, v0, v4 ; D2820000 04120005 v_mov_b32_e32 v4, s10 ; 7E08020A v_mul_f32_e32 v6, v3, v10 ; 100C1503 v_mul_f32_e32 v3, v3, v12 ; 10061903 v_cndmask_b32_e64 v6, v6, 0, vcc ; D2000006 01A90106 v_mad_f32 v6, v11, v2, v6 ; D2820006 041A050B v_mad_f32 v4, s7, v6, v4 ; D2820004 04120C07 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_mad_f32 v2, v1, v2, v3 ; D2820002 040E0501 v_mov_b32_e32 v3, s11 ; 7E06020B v_mad_f32 v2, s20, v2, v3 ; D2820002 040E0414 v_mad_f32 v0, v8, s19, v0 ; D2820000 04002708 v_mad_f32 v3, v11, s21, v4 ; D2820003 04102B0B v_mad_f32 v1, v1, s0, v2 ; D2820001 04080101 v_cvt_pkrtz_f16_f32_e32 v0, v0, v3 ; 5E000700 v_mul_f32_e32 v2, 0x3b800000, v5 ; 10040AFF 3B800000 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 36 Code Size: 1524 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[4], PERSPECTIVE DCL IN[1], TEXCOORD[5], PERSPECTIVE DCL IN[2], TEXCOORD[6], PERSPECTIVE DCL IN[3], TEXCOORD[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL CONST[0..10] DCL TEMP[0] DCL TEMP[1], LOCAL DCL TEMP[2..5] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { 0.3000, 0.5900, 0.1100, -0.0000} IMM[2] FLT32 { 20.0000, -0.8000, 0.2000, 8.0000} IMM[3] FLT32 { 0.0398, 0.0039, 0.0000, 340282346638528859811704183484516925440.0000} IMM[4] FLT32 {-340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: TEX TEMP[0], IN[3], SAMP[2], 2D 1: MAD TEMP[0].xyz, TEMP[0], IMM[0].xxxx, IMM[0].yyyy 2: DP3 TEMP[1].x, IN[2], IN[2] 3: RSQ TEMP[1].x, TEMP[1].xxxx 4: MIN TEMP[1].x, IMM[3].wwww, TEMP[1].xxxx 5: MUL TEMP[2].xyz, IN[2], TEMP[1].xxxx 6: DP3 TEMP[0].x, TEMP[0], TEMP[2] 7: MAX TEMP[2].w, TEMP[0].xxxx, IMM[0].wwww 8: MOV_SAT TEMP[0].x, TEMP[0].xxxx 9: ADD TEMP[0].y, TEMP[2].wwww, IMM[2].yyyy 10: ADD TEMP[0].z, -TEMP[2].wwww, IMM[0].zzzz 11: CMP TEMP[0].y, TEMP[0].yyyy, TEMP[0].zzzz, IMM[2].zzzz 12: MUL TEMP[0].y, TEMP[0].yyyy, TEMP[0].yyyy 13: MUL TEMP[0].y, TEMP[0].yyyy, TEMP[0].yyyy 14: TEX TEMP[3], IN[3], SAMP[0], 2D 15: MAD TEMP[3].xyz, TEMP[3], IMM[0].xxxx, IMM[0].yyyy 16: DP3 TEMP[1].x, TEMP[3], TEMP[3] 17: RSQ TEMP[1].x, TEMP[1].xxxx 18: MIN TEMP[1].x, IMM[3].wwww, TEMP[1].xxxx 19: MUL TEMP[4].xyz, TEMP[3], TEMP[1].xxxx 20: DP3 TEMP[0].z, TEMP[4], TEMP[2] 21: MUL TEMP[3].xyz, TEMP[0].zzzz, TEMP[4] 22: MAD TEMP[2].xyz, TEMP[3], IMM[0].xxxx, -TEMP[2] 23: DP3 TEMP[1].x, IN[0], IN[0] 24: RSQ TEMP[1].x, TEMP[1].xxxx 25: MIN TEMP[1].x, IMM[3].wwww, TEMP[1].xxxx 26: MUL TEMP[3].xyz, IN[0], TEMP[1].xxxx 27: DP3_SAT TEMP[0].z, TEMP[2], TEMP[3] 28: DP3_SAT TEMP[0].w, TEMP[4], TEMP[3] 29: LG2 TEMP[1].x, |TEMP[0].zzzz| 30: MAX TEMP[2].x, IMM[4].xxxx, TEMP[1].xxxx 31: ADD TEMP[0].z, TEMP[0].zzzz, IMM[1].wwww 32: MUL TEMP[2].y, TEMP[2].xxxx, IMM[2].xxxx 33: MUL TEMP[2].x, TEMP[2].xxxx, CONST[8].yyyy 34: EX2 TEMP[2].x, TEMP[2].xxxx 35: EX2 TEMP[2].y, TEMP[2].yyyy 36: CMP TEMP[2].y, TEMP[0].zzzz, IMM[0].wwww, TEMP[2].yyyy 37: MUL TEMP[0].y, TEMP[0].yyyy, TEMP[2].yyyy 38: TEX TEMP[3], IN[3], SAMP[3], 2D 39: MUL TEMP[2].yzw, TEMP[0].yyyy, TEMP[3].xxyz 40: MUL TEMP[3].xyz, TEMP[3], CONST[8].xxxx 41: MAD TEMP[2].yzw, CONST[7].wwww, TEMP[2], TEMP[3].xxyz 42: MUL TEMP[2].yzw, TEMP[2], CONST[7].zzzz 43: MAD TEMP[2].yzw, TEMP[2], CONST[5].wwww, CONST[5].xxyz 44: MOV TEMP[0].y, CONST[8].yyyy 45: ADD TEMP[0].y, TEMP[0].yyyy, IMM[2].wwww 46: MUL TEMP[0].y, TEMP[0].yyyy, TEMP[2].xxxx 47: MUL TEMP[0].y, TEMP[0].yyyy, IMM[3].xxxx 48: CMP TEMP[0].y, TEMP[0].zzzz, IMM[0].wwww, TEMP[0].yyyy 49: MUL TEMP[2].xyz, TEMP[2].yzww, TEMP[0].yyyy 50: ADD TEMP[0].y, TEMP[0].xxxx, IMM[1].wwww 51: POW TEMP[2].w, |TEMP[0].xxxx|, CONST[7].yyyy 52: CMP TEMP[0].x, TEMP[0].yyyy, IMM[0].wwww, TEMP[2].wwww 53: MUL TEMP[3].xyz, TEMP[0].xxxx, CONST[6] 54: TEX TEMP[4], IN[3], SAMP[1], 2D 55: DP3 TEMP[0].y, TEMP[4], IMM[1] 56: LRP TEMP[5].xyz, CONST[7].xxxx, TEMP[0].yyyy, TEMP[4] 57: MAD TEMP[3].xyz, TEMP[5], TEMP[3], -TEMP[4] 58: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[3], TEMP[4] 59: MUL TEMP[0].xyz, TEMP[0], CONST[7].zzzz 60: MOV TEMP[3].z, IMM[0].zzzz 61: ADD TEMP[3].xyz, TEMP[3].zzzz, -CONST[0] 62: MUL TEMP[0].xyz, TEMP[0], TEMP[3] 63: MAD TEMP[0].xyz, TEMP[0], CONST[4].wwww, CONST[4] 64: ADD TEMP[2].w, TEMP[0].wwww, IMM[1].wwww 65: CMP TEMP[0].w, TEMP[2].wwww, IMM[0].wwww, TEMP[0].wwww 66: MAD TEMP[2].xyz, TEMP[0], TEMP[0].wwww, TEMP[2] 67: MOV TEMP[3].xyz, CONST[0] 68: MAD TEMP[2].xyz, TEMP[2], CONST[9], TEMP[3] 69: MAD OUT[0].xyz, TEMP[0], CONST[10], TEMP[2] 70: MUL OUT[0].w, IMM[3].yyyy, IN[1].wwww 71: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %50 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %51 = load <32 x i8>, <32 x i8> addrspace(2)* %50, align 32, !tbaa !0 %52 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %55 = bitcast <8 x i32> addrspace(2)* %54 to <32 x i8> addrspace(2)* %56 = load <32 x i8>, <32 x i8> addrspace(2)* %55, align 32, !tbaa !0 %57 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %58 = bitcast <4 x i32> addrspace(2)* %57 to <16 x i8> addrspace(2)* %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 %60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %61 = bitcast <8 x i32> addrspace(2)* %60 to <32 x i8> addrspace(2)* %62 = load <32 x i8>, <32 x i8> addrspace(2)* %61, align 32, !tbaa !0 %63 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %64 = bitcast <4 x i32> addrspace(2)* %63 to <16 x i8> addrspace(2)* %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0 %66 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %67 = bitcast <8 x i32> addrspace(2)* %66 to <32 x i8> addrspace(2)* %68 = load <32 x i8>, <32 x i8> addrspace(2)* %67, align 32, !tbaa !0 %69 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %70 = bitcast <4 x i32> addrspace(2)* %69 to <16 x i8> addrspace(2)* %71 = load <16 x i8>, <16 x i8> addrspace(2)* %70, align 16, !tbaa !0 %72 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %81 = bitcast float %79 to i32 %82 = bitcast float %80 to i32 %83 = insertelement <2 x i32> undef, i32 %81, i32 0 %84 = insertelement <2 x i32> %83, i32 %82, i32 1 %85 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %84, <32 x i8> %62, <16 x i8> %65, i32 2) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = fmul float %86, 2.000000e+00 %90 = fadd float %89, -1.000000e+00 %91 = fmul float %87, 2.000000e+00 %92 = fadd float %91, -1.000000e+00 %93 = fmul float %88, 2.000000e+00 %94 = fadd float %93, -1.000000e+00 %95 = fmul float %76, %76 %96 = fmul float %77, %77 %97 = fadd float %96, %95 %98 = fmul float %78, %78 %99 = fadd float %97, %98 %100 = call float @llvm.AMDGPU.rsq.clamped.f32(float %99) %101 = call float @llvm.minnum.f32(float %100, float 0x47EFFFFFE0000000) %102 = fmul float %76, %101 %103 = fmul float %77, %101 %104 = fmul float %78, %101 %105 = fmul float %90, %102 %106 = fmul float %92, %103 %107 = fadd float %106, %105 %108 = fmul float %94, %104 %109 = fadd float %107, %108 %110 = call float @llvm.maxnum.f32(float %109, float 0.000000e+00) %111 = call float @llvm.AMDIL.clamp.(float %109, float 0.000000e+00, float 1.000000e+00) %112 = fadd float %110, 0xBFE99999A0000000 %113 = fsub float 1.000000e+00, %110 %114 = call float @llvm.AMDGPU.cndlt(float %112, float %113, float 0x3FC9999980000000) %115 = fmul float %114, %114 %116 = fmul float %115, %115 %117 = bitcast float %79 to i32 %118 = bitcast float %80 to i32 %119 = insertelement <2 x i32> undef, i32 %117, i32 0 %120 = insertelement <2 x i32> %119, i32 %118, i32 1 %121 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %120, <32 x i8> %51, <16 x i8> %53, i32 2) %122 = extractelement <4 x float> %121, i32 0 %123 = extractelement <4 x float> %121, i32 1 %124 = extractelement <4 x float> %121, i32 2 %125 = fmul float %122, 2.000000e+00 %126 = fadd float %125, -1.000000e+00 %127 = fmul float %123, 2.000000e+00 %128 = fadd float %127, -1.000000e+00 %129 = fmul float %124, 2.000000e+00 %130 = fadd float %129, -1.000000e+00 %131 = fmul float %126, %126 %132 = fmul float %128, %128 %133 = fadd float %132, %131 %134 = fmul float %130, %130 %135 = fadd float %133, %134 %136 = call float @llvm.AMDGPU.rsq.clamped.f32(float %135) %137 = call float @llvm.minnum.f32(float %136, float 0x47EFFFFFE0000000) %138 = fmul float %126, %137 %139 = fmul float %128, %137 %140 = fmul float %130, %137 %141 = fmul float %138, %102 %142 = fmul float %139, %103 %143 = fadd float %142, %141 %144 = fmul float %140, %104 %145 = fadd float %143, %144 %146 = fmul float %145, %138 %147 = fmul float %145, %139 %148 = fmul float %145, %140 %149 = fmul float %146, 2.000000e+00 %150 = fsub float %149, %102 %151 = fmul float %147, 2.000000e+00 %152 = fsub float %151, %103 %153 = fmul float %148, 2.000000e+00 %154 = fsub float %153, %104 %155 = fmul float %72, %72 %156 = fmul float %73, %73 %157 = fadd float %156, %155 %158 = fmul float %74, %74 %159 = fadd float %157, %158 %160 = call float @llvm.AMDGPU.rsq.clamped.f32(float %159) %161 = call float @llvm.minnum.f32(float %160, float 0x47EFFFFFE0000000) %162 = fmul float %72, %161 %163 = fmul float %73, %161 %164 = fmul float %74, %161 %165 = fmul float %150, %162 %166 = fmul float %152, %163 %167 = fadd float %166, %165 %168 = fmul float %154, %164 %169 = fadd float %167, %168 %170 = call float @llvm.AMDIL.clamp.(float %169, float 0.000000e+00, float 1.000000e+00) %171 = fmul float %138, %162 %172 = fmul float %139, %163 %173 = fadd float %172, %171 %174 = fmul float %140, %164 %175 = fadd float %173, %174 %176 = call float @llvm.AMDIL.clamp.(float %175, float 0.000000e+00, float 1.000000e+00) %177 = call float @fabs(float %170) %178 = call float @llvm.log2.f32(float %177) %179 = call float @llvm.maxnum.f32(float %178, float 0xC7EFFFFFE0000000) %180 = fadd float %170, 0xBEB0C6F7A0000000 %181 = fmul float %179, 2.000000e+01 %182 = fmul float %179, %43 %183 = call float @llvm.AMDIL.exp.(float %182) %184 = call float @llvm.AMDIL.exp.(float %181) %185 = call float @llvm.AMDGPU.cndlt(float %180, float 0.000000e+00, float %184) %186 = fmul float %116, %185 %187 = bitcast float %79 to i32 %188 = bitcast float %80 to i32 %189 = insertelement <2 x i32> undef, i32 %187, i32 0 %190 = insertelement <2 x i32> %189, i32 %188, i32 1 %191 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %190, <32 x i8> %68, <16 x i8> %71, i32 2) %192 = extractelement <4 x float> %191, i32 0 %193 = extractelement <4 x float> %191, i32 1 %194 = extractelement <4 x float> %191, i32 2 %195 = fmul float %186, %192 %196 = fmul float %186, %193 %197 = fmul float %186, %194 %198 = fmul float %192, %42 %199 = fmul float %193, %42 %200 = fmul float %194, %42 %201 = fmul float %41, %195 %202 = fadd float %201, %198 %203 = fmul float %41, %196 %204 = fadd float %203, %199 %205 = fmul float %41, %197 %206 = fadd float %205, %200 %207 = fmul float %202, %40 %208 = fmul float %204, %40 %209 = fmul float %206, %40 %210 = fmul float %207, %34 %211 = fadd float %210, %31 %212 = fmul float %208, %34 %213 = fadd float %212, %32 %214 = fmul float %209, %34 %215 = fadd float %214, %33 %216 = fadd float %43, 8.000000e+00 %217 = fmul float %216, %183 %218 = fmul float %217, 0x3FA45F3060000000 %219 = call float @llvm.AMDGPU.cndlt(float %180, float 0.000000e+00, float %218) %220 = fmul float %211, %219 %221 = fmul float %213, %219 %222 = fmul float %215, %219 %223 = fadd float %111, 0xBEB0C6F7A0000000 %224 = call float @fabs(float %111) %225 = call float @llvm.pow.f32(float %224, float %39) %226 = call float @llvm.AMDGPU.cndlt(float %223, float 0.000000e+00, float %225) %227 = fmul float %226, %35 %228 = fmul float %226, %36 %229 = fmul float %226, %37 %230 = bitcast float %79 to i32 %231 = bitcast float %80 to i32 %232 = insertelement <2 x i32> undef, i32 %230, i32 0 %233 = insertelement <2 x i32> %232, i32 %231, i32 1 %234 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %233, <32 x i8> %56, <16 x i8> %59, i32 2) %235 = extractelement <4 x float> %234, i32 0 %236 = extractelement <4 x float> %234, i32 1 %237 = extractelement <4 x float> %234, i32 2 %238 = fmul float %235, 0x3FD3333340000000 %239 = fmul float %236, 0x3FE2E147A0000000 %240 = fadd float %239, %238 %241 = fmul float %237, 0x3FBC28F5C0000000 %242 = fadd float %240, %241 %243 = call float @llvm.AMDGPU.lrp(float %38, float %242, float %235) %244 = call float @llvm.AMDGPU.lrp(float %38, float %242, float %236) %245 = call float @llvm.AMDGPU.lrp(float %38, float %242, float %237) %246 = fmul float %243, %227 %247 = fsub float %246, %235 %248 = fmul float %244, %228 %249 = fsub float %248, %236 %250 = fmul float %245, %229 %251 = fsub float %250, %237 %252 = fmul float %226, %247 %253 = fadd float %252, %235 %254 = fmul float %226, %249 %255 = fadd float %254, %236 %256 = fmul float %226, %251 %257 = fadd float %256, %237 %258 = fmul float %253, %40 %259 = fmul float %255, %40 %260 = fmul float %257, %40 %261 = fsub float 1.000000e+00, %24 %262 = fsub float 1.000000e+00, %25 %263 = fsub float 1.000000e+00, %26 %264 = fmul float %258, %261 %265 = fmul float %259, %262 %266 = fmul float %260, %263 %267 = fmul float %264, %30 %268 = fadd float %267, %27 %269 = fmul float %265, %30 %270 = fadd float %269, %28 %271 = fmul float %266, %30 %272 = fadd float %271, %29 %273 = fadd float %176, 0xBEB0C6F7A0000000 %274 = call float @llvm.AMDGPU.cndlt(float %273, float 0.000000e+00, float %176) %275 = fmul float %268, %274 %276 = fadd float %275, %220 %277 = fmul float %270, %274 %278 = fadd float %277, %221 %279 = fmul float %272, %274 %280 = fadd float %279, %222 %281 = fmul float %276, %44 %282 = fadd float %281, %24 %283 = fmul float %278, %45 %284 = fadd float %283, %25 %285 = fmul float %280, %46 %286 = fadd float %285, %26 %287 = fmul float %268, %47 %288 = fadd float %287, %282 %289 = fmul float %270, %48 %290 = fadd float %289, %284 %291 = fmul float %272, %49 %292 = fadd float %291, %286 %293 = fmul float %75, 3.906250e-03 %294 = call i32 @llvm.SI.packf16(float %288, float %290) %295 = bitcast i32 %294 to float %296 = call i32 @llvm.SI.packf16(float %292, float %293) %297 = bitcast i32 %296 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %295, float %297, float %295, float %297) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 s_load_dwordx4 s[40:43], s[4:5], 0x0 ; C0940500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 1, [m0] ; C8140700 v_interp_p2_f32 v5, [v5], v1, 3, 1, [m0] ; C8150701 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[8:11], 0x1d ; C206091D s_buffer_load_dword s1, s[8:11], 0x1e ; C200891E s_buffer_load_dword s2, s[8:11], 0x1f ; C201091F s_buffer_load_dword s3, s[8:11], 0x20 ; C2018920 s_buffer_load_dword s0, s[8:11], 0x21 ; C2000921 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 v_interp_p1_f32 v8, v0, 2, 2, [m0] ; C8200A00 v_interp_p2_f32 v8, [v8], v1, 2, 2, [m0] ; C8210A01 v_interp_p1_f32 v9, v0, 0, 3, [m0] ; C8240C00 v_interp_p2_f32 v9, [v9], v1, 0, 3, [m0] ; C8250C01 v_interp_p1_f32 v10, v0, 1, 3, [m0] ; C8280D00 v_interp_p2_f32 v10, [v10], v1, 1, 3, [m0] ; C8290D01 s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_load_dwordx4 s[44:47], s[4:5], 0x8 ; C0960508 s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C s_load_dwordx8 s[48:55], s[6:7], 0x0 ; C0D80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_load_dwordx8 s[56:63], s[6:7], 0x10 ; C0DC0710 s_load_dwordx8 s[32:39], s[6:7], 0x18 ; C0D00718 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[56:63], s[44:47] ; F0800700 016E0B09 image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[48:55], s[40:43] ; F0800700 014C0E09 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v0, 2.0, v11, -1.0 ; D2820000 03CE16F4 v_mad_f32 v1, 2.0, v12, -1.0 ; D2820001 03CE18F4 v_mad_f32 v11, 2.0, v13, -1.0 ; D282000B 03CE1AF4 v_mul_f32_e32 v12, v6, v6 ; 10180D06 v_mad_f32 v12, v7, v7, v12 ; D282000C 04320F07 v_mad_f32 v12, v8, v8, v12 ; D282000C 04321108 v_rsq_clamp_f32_e32 v12, v12 ; 7E18590C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v13, 2.0, v14, -1.0 ; D282000D 03CE1CF4 v_mad_f32 v14, 2.0, v15, -1.0 ; D282000E 03CE1EF4 v_mad_f32 v15, 2.0, v16, -1.0 ; D282000F 03CE20F4 v_min_f32_e32 v12, 0x7f7fffff, v12 ; 1E1818FF 7F7FFFFF v_mul_f32_e32 v16, v12, v6 ; 10200D0C v_mul_f32_e32 v0, v16, v0 ; 10000110 v_mul_f32_e32 v17, v12, v7 ; 10220F0C v_mad_f32 v0, v1, v17, v0 ; D2820000 04022301 v_mul_f32_e32 v1, v12, v8 ; 1002110C v_mad_f32 v0, v11, v1, v0 ; D2820000 0402030B s_buffer_load_dword s4, s[8:11], 0x1c ; C202091C v_add_f32_e64 v11, 0, v0 clamp ; D206080B 00020080 v_mov_b32_e32 v18, 0xb58637bd ; 7E2402FF B58637BD v_add_f32_e32 v19, v18, v11 ; 06261712 v_cmp_gt_f32_e32 vcc, 0, v19 ; 7C082680 image_sample v[19:21], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[32:39], s[20:23] ; F0800700 00A81309 v_and_b32_e32 v11, 0x7fffffff, v11 ; 361616FF 7FFFFFFF v_log_f32_e32 v11, v11 ; 7E164F0B image_sample v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[24:31], s[16:19] ; F0800700 00861609 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v9, 0x3e99999a, v22 ; 10122CFF 3E99999A v_madmk_f32_e32 v9, v23, v9, 0x3f170a3d ; 40121317 3F170A3D v_madmk_f32_e32 v9, v24, v9, 0x3de147ae ; 40121318 3DE147AE v_sub_f32_e64 v10, 1.0, s4 ; D208000A 000008F2 v_mul_f32_e32 v25, v22, v10 ; 10321516 v_mad_f32 v25, s4, v9, v25 ; D2820019 04661204 s_buffer_load_dword s5, s[8:11], 0x18 ; C2028918 s_buffer_load_dword s6, s[8:11], 0x19 ; C2030919 v_mul_f32_e32 v26, v23, v10 ; 10341517 v_mad_f32 v26, s4, v9, v26 ; D282001A 046A1204 s_buffer_load_dword s7, s[8:11], 0x1a ; C203891A v_mul_f32_e32 v10, v24, v10 ; 10141518 v_mad_f32 v9, s4, v9, v10 ; D2820009 042A1204 v_mul_legacy_f32_e32 v10, s12, v11 ; 0E14160C v_exp_f32_e32 v10, v10 ; 7E144B0A v_cndmask_b32_e64 v10, v10, 0, vcc ; D200000A 01A9010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v11, s5, v10 ; 10161405 v_mad_f32 v11, v25, v11, -v22 ; D282000B 845A1719 v_mul_f32_e32 v25, s6, v10 ; 10321406 v_mad_f32 v25, v26, v25, -v23 ; D2820019 845E331A v_mul_f32_e32 v26, s7, v10 ; 10341407 v_mad_f32 v9, v9, v26, -v24 ; D2820009 84623509 v_mul_f32_e32 v26, v13, v13 ; 10341B0D v_mad_f32 v26, v14, v14, v26 ; D282001A 046A1D0E v_mad_f32 v26, v15, v15, v26 ; D282001A 046A1F0F v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A v_mad_f32 v11, v10, v11, v22 ; D282000B 045A170A v_mad_f32 v22, v10, v25, v23 ; D2820016 045E330A v_mad_f32 v9, v10, v9, v24 ; D2820009 0462130A v_min_f32_e32 v10, 0x7f7fffff, v26 ; 1E1434FF 7F7FFFFF v_mul_f32_e32 v13, v10, v13 ; 101A1B0A v_mul_f32_e32 v14, v10, v14 ; 101C1D0A v_mul_f32_e32 v10, v10, v15 ; 10141F0A s_buffer_load_dword s4, s[8:11], 0x17 ; C2020917 s_buffer_load_dword s5, s[8:11], 0x0 ; C2028900 s_buffer_load_dword s6, s[8:11], 0x1 ; C2030901 s_buffer_load_dword s7, s[8:11], 0x2 ; C2038902 s_buffer_load_dword s12, s[8:11], 0x10 ; C2060910 s_buffer_load_dword s13, s[8:11], 0x11 ; C2068911 s_buffer_load_dword s14, s[8:11], 0x12 ; C2070912 s_buffer_load_dword s15, s[8:11], 0x13 ; C2078913 s_buffer_load_dword s16, s[8:11], 0x14 ; C2080914 s_buffer_load_dword s17, s[8:11], 0x15 ; C2088915 s_buffer_load_dword s18, s[8:11], 0x16 ; C2090916 s_buffer_load_dword s19, s[8:11], 0x24 ; C2098924 s_buffer_load_dword s20, s[8:11], 0x25 ; C20A0925 s_buffer_load_dword s21, s[8:11], 0x26 ; C20A8926 s_buffer_load_dword s22, s[8:11], 0x28 ; C20B0928 s_buffer_load_dword s23, s[8:11], 0x29 ; C20B8929 s_buffer_load_dword s8, s[8:11], 0x2a ; C204092A v_mul_f32_e32 v15, v16, v13 ; 101E1B10 v_mad_f32 v15, v14, v17, v15 ; D282000F 043E230E v_mad_f32 v1, v10, v1, v15 ; D2820001 043E030A v_mul_f32_e32 v15, v13, v1 ; 101E030D v_mad_f32 v15, v1, v13, v15 ; D282000F 043E1B01 v_mad_f32 v6, -v6, v12, v15 ; D2820006 243E1906 v_mul_f32_e32 v15, v14, v1 ; 101E030E v_mad_f32 v15, v1, v14, v15 ; D282000F 043E1D01 v_mad_f32 v7, -v7, v12, v15 ; D2820007 243E1907 v_mul_f32_e32 v15, v2, v2 ; 101E0502 v_mad_f32 v15, v3, v3, v15 ; D282000F 043E0703 v_mad_f32 v15, v4, v4, v15 ; D282000F 043E0904 v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F v_mul_f32_e32 v16, v10, v1 ; 1020030A v_mad_f32 v1, v1, v10, v16 ; D2820001 04421501 v_mad_f32 v1, -v8, v12, v1 ; D2820001 24061908 v_min_f32_e32 v8, 0x7f7fffff, v15 ; 1E101EFF 7F7FFFFF v_mul_f32_e32 v2, v8, v2 ; 10040508 v_mul_f32_e32 v3, v8, v3 ; 10060708 v_mul_f32_e32 v4, v8, v4 ; 10080908 v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mad_f32 v6, v7, v3, v6 ; D2820006 041A0707 v_mad_f32 v1, v1, v4, v6 ; D2820001 041A0901 v_max_f32_e32 v0, 0, v0 ; 20000080 v_mov_b32_e32 v6, 0xbf4ccccd ; 7E0C02FF BF4CCCCD v_add_f32_e32 v6, v0, v6 ; 060C0D00 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_cmp_gt_f32_e32 vcc, 0, v6 ; 7C080C80 v_log_f32_e64 v6, |v1| ; D34E0106 00000101 v_mov_b32_e32 v7, 0x3e4ccccc ; 7E0E02FF 3E4CCCCC v_cndmask_b32_e64 v0, v7, v0, vcc ; D2000000 01AA0107 v_mov_b32_e32 v7, 0xff7fffff ; 7E0E02FF FF7FFFFF v_max_f32_e32 v6, v6, v7 ; 200C0F06 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_add_f32_e32 v1, v18, v1 ; 06020312 v_mul_f32_e32 v7, 0x41a00000, v6 ; 100E0CFF 41A00000 v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, v7, 0, vcc ; D2000001 01A90107 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mul_f32_e32 v1, s3, v19 ; 10022603 v_mul_f32_e32 v7, s3, v20 ; 100E2803 v_mul_f32_e32 v8, s3, v21 ; 10102A03 v_mul_f32_e32 v12, v19, v0 ; 10180113 v_mul_f32_e32 v15, v20, v0 ; 101E0114 v_mul_f32_e32 v0, v21, v0 ; 10000115 v_mad_f32 v1, s2, v12, v1 ; D2820001 04061802 v_mad_f32 v7, s2, v15, v7 ; D2820007 041E1E02 v_mad_f32 v0, s2, v0, v8 ; D2820000 04220002 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s16 ; 7E100210 v_mul_f32_e32 v1, s1, v1 ; 10020201 v_mad_f32 v1, s4, v1, v8 ; D2820001 04220204 v_mov_b32_e32 v8, s17 ; 7E100211 v_mul_f32_e32 v7, s1, v7 ; 100E0E01 v_mad_f32 v7, s4, v7, v8 ; D2820007 04220E04 v_mov_b32_e32 v8, s18 ; 7E100212 v_mul_f32_e32 v0, s1, v0 ; 10000001 v_mad_f32 v0, s4, v0, v8 ; D2820000 04220004 v_mul_f32_e32 v8, s1, v11 ; 10101601 v_mul_f32_e32 v11, s1, v22 ; 10162C01 v_mul_f32_e32 v9, s1, v9 ; 10121201 v_sub_f32_e64 v12, 1.0, s5 ; D208000C 00000AF2 v_mul_f32_e32 v8, v12, v8 ; 1010110C v_sub_f32_e64 v12, 1.0, s6 ; D208000C 00000CF2 v_mul_f32_e32 v11, v12, v11 ; 1016170C v_sub_f32_e64 v12, 1.0, s7 ; D208000C 00000EF2 v_mul_f32_e32 v9, v12, v9 ; 1012130C v_mov_b32_e32 v12, s12 ; 7E18020C v_mad_f32 v8, s15, v8, v12 ; D2820008 0432100F v_mov_b32_e32 v12, s13 ; 7E18020D v_mad_f32 v11, s15, v11, v12 ; D282000B 0432160F v_mov_b32_e32 v12, s14 ; 7E18020E v_mad_f32 v9, s15, v9, v12 ; D2820009 0432120F v_mul_f32_e32 v2, v2, v13 ; 10041B02 v_mad_f32 v2, v14, v3, v2 ; D2820002 040A070E v_mad_f32 v2, v10, v4, v2 ; D2820002 040A090A v_mul_f32_e32 v3, s0, v6 ; 10060C00 v_mov_b32_e32 v4, 0x41000000 ; 7E0802FF 41000000 v_add_f32_e32 v4, s0, v4 ; 06080800 v_exp_f32_e32 v3, v3 ; 7E064B03 v_mul_f32_e32 v3, v3, v4 ; 10060903 v_mul_f32_e32 v3, 0x3d22f983, v3 ; 100606FF 3D22F983 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_add_f32_e32 v4, v18, v2 ; 06080512 v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_mov_b32_e32 v4, s5 ; 7E080205 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_mad_f32 v1, v8, v2, v1 ; D2820001 04060508 v_mad_f32 v1, s19, v1, v4 ; D2820001 04120213 v_mul_f32_e32 v4, v3, v7 ; 10080F03 v_mad_f32 v4, v11, v2, v4 ; D2820004 0412050B v_mov_b32_e32 v6, s6 ; 7E0C0206 v_mad_f32 v4, s20, v4, v6 ; D2820004 041A0814 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mad_f32 v0, v9, v2, v0 ; D2820000 04020509 v_mov_b32_e32 v2, s7 ; 7E040207 v_mad_f32 v0, s21, v0, v2 ; D2820000 040A0015 v_mad_f32 v1, v8, s22, v1 ; D2820001 04042D08 v_mad_f32 v2, v11, s23, v4 ; D2820002 04102F0B v_mad_f32 v0, v9, s8, v0 ; D2820000 04001109 v_mul_f32_e32 v3, 0x3b800000, v5 ; 10060AFF 3B800000 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v0, v0, v3 ; 5E000700 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 28 Code Size: 1256 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[4], PERSPECTIVE DCL IN[1], TEXCOORD[5], PERSPECTIVE DCL IN[2], TEXCOORD[6], PERSPECTIVE DCL IN[3], TEXCOORD[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL CONST[0..9] DCL TEMP[0..2] DCL TEMP[3], LOCAL DCL TEMP[4..5] IMM[0] FLT32 { -0.0000, 8.0000, 0.0398, 0.0039} IMM[1] FLT32 { 2.0000, -1.0000, -2.0000, -0.0000} IMM[2] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: TEX TEMP[0], IN[3], SAMP[0], 2D 1: MAD TEMP[1].xyz, TEMP[0], IMM[1].xxxx, IMM[1].yyzw 2: MAD TEMP[0].xyz, TEMP[0], IMM[1].xxxx, IMM[1].yyyy 3: TEX TEMP[2], IN[3], SAMP[1], 2D 4: MAD TEMP[1].xyz, TEMP[2].xxxx, TEMP[1], -IMM[1].wwyw 5: DP3 TEMP[3].x, TEMP[1], TEMP[1] 6: RSQ TEMP[3].x, TEMP[3].xxxx 7: MIN TEMP[3].x, IMM[2].xxxx, TEMP[3].xxxx 8: MUL TEMP[4].xyz, TEMP[1], TEMP[3].xxxx 9: DP3 TEMP[3].x, IN[2], IN[2] 10: RSQ TEMP[3].x, TEMP[3].xxxx 11: MIN TEMP[3].x, IMM[2].xxxx, TEMP[3].xxxx 12: MUL TEMP[1].xyz, IN[2], TEMP[3].xxxx 13: DP3 TEMP[0].w, TEMP[4], TEMP[1] 14: MUL TEMP[5].xyz, TEMP[0].wwww, TEMP[4] 15: MAD TEMP[5].xyz, TEMP[5], IMM[1].xxxx, -TEMP[1] 16: DP3_SAT TEMP[0].x, TEMP[1], TEMP[0] 17: TEX TEMP[1], TEMP[5], SAMP[3], CUBE 18: MUL TEMP[0].yzw, TEMP[1].xxyz, TEMP[2].xxxx 19: MUL TEMP[0].yzw, TEMP[0], CONST[6].xxxx 20: MUL TEMP[1].x, TEMP[0].xxxx, TEMP[0].xxxx 21: MUL TEMP[1].x, TEMP[0].xxxx, TEMP[1].xxxx 22: ADD TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 23: MUL TEMP[0].yzw, TEMP[0], TEMP[1].xxxx 24: MUL TEMP[0].yzw, TEMP[0], TEMP[2].yyyy 25: MUL TEMP[1].x, TEMP[2].yyyy, CONST[6].wwww 26: MAD TEMP[1].xyz, TEMP[1].xxxx, CONST[5].wwww, CONST[5] 27: CMP TEMP[0].xyz, TEMP[0].xxxx, -IMM[1].wwww, TEMP[0].yzww 28: TEX TEMP[2], IN[3], SAMP[2], 2D 29: MAD TEMP[0].xyz, CONST[6].yyyy, TEMP[2], TEMP[0] 30: MUL TEMP[0].xyz, TEMP[0], CONST[6].zzzz 31: MOV TEMP[2].y, IMM[1].yyyy 32: ADD TEMP[2].xyz, -TEMP[2].yyyy, -CONST[0] 33: MUL TEMP[0].xyz, TEMP[0], TEMP[2] 34: MAD TEMP[0].xyz, TEMP[0], CONST[4].wwww, CONST[4] 35: DP3 TEMP[3].x, IN[0], IN[0] 36: RSQ TEMP[3].x, TEMP[3].xxxx 37: MIN TEMP[3].x, IMM[2].xxxx, TEMP[3].xxxx 38: MUL TEMP[2].xyz, IN[0], TEMP[3].xxxx 39: DP3_SAT TEMP[0].w, TEMP[4], TEMP[2] 40: DP3_SAT TEMP[1].w, TEMP[5], TEMP[2] 41: MUL TEMP[2].xyz, TEMP[0].wwww, TEMP[0] 42: ADD TEMP[0].w, TEMP[0].wwww, IMM[0].xxxx 43: CMP TEMP[2].xyz, TEMP[0].wwww, -IMM[1].wwww, TEMP[2] 44: POW TEMP[0].w, |TEMP[1].wwww|, CONST[7].xxxx 45: ADD TEMP[1].w, TEMP[1].wwww, IMM[0].xxxx 46: MOV TEMP[4].y, IMM[0].yyyy 47: ADD TEMP[2].w, TEMP[4].yyyy, CONST[7].xxxx 48: MUL TEMP[0].w, TEMP[0].wwww, TEMP[2].wwww 49: MUL TEMP[0].w, TEMP[0].wwww, IMM[0].zzzz 50: MUL TEMP[1].xyz, TEMP[1], TEMP[0].wwww 51: CMP TEMP[1].xyz, TEMP[1].wwww, -IMM[1].wwww, TEMP[1] 52: ADD TEMP[1].xyz, TEMP[1], TEMP[2] 53: MOV TEMP[2].xyz, CONST[0] 54: MAD TEMP[1].xyz, TEMP[1], CONST[8], TEMP[2] 55: MAD OUT[0].xyz, TEMP[0], CONST[9], TEMP[1] 56: MUL OUT[0].w, IMM[0].wwww, IN[1].wwww 57: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %46 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %47 = load <32 x i8>, <32 x i8> addrspace(2)* %46, align 32, !tbaa !0 %48 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !tbaa !0 %50 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %51 = bitcast <8 x i32> addrspace(2)* %50 to <32 x i8> addrspace(2)* %52 = load <32 x i8>, <32 x i8> addrspace(2)* %51, align 32, !tbaa !0 %53 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %54 = bitcast <4 x i32> addrspace(2)* %53 to <16 x i8> addrspace(2)* %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %57 = bitcast <8 x i32> addrspace(2)* %56 to <32 x i8> addrspace(2)* %58 = load <32 x i8>, <32 x i8> addrspace(2)* %57, align 32, !tbaa !0 %59 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %60 = bitcast <4 x i32> addrspace(2)* %59 to <16 x i8> addrspace(2)* %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 %62 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %63 = bitcast <8 x i32> addrspace(2)* %62 to <32 x i8> addrspace(2)* %64 = load <32 x i8>, <32 x i8> addrspace(2)* %63, align 32, !tbaa !0 %65 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %66 = bitcast <4 x i32> addrspace(2)* %65 to <16 x i8> addrspace(2)* %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %71 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %77 = bitcast float %75 to i32 %78 = bitcast float %76 to i32 %79 = insertelement <2 x i32> undef, i32 %77, i32 0 %80 = insertelement <2 x i32> %79, i32 %78, i32 1 %81 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %80, <32 x i8> %47, <16 x i8> %49, i32 2) %82 = extractelement <4 x float> %81, i32 0 %83 = extractelement <4 x float> %81, i32 1 %84 = extractelement <4 x float> %81, i32 2 %85 = fmul float %82, 2.000000e+00 %86 = fadd float %85, -1.000000e+00 %87 = fmul float %83, 2.000000e+00 %88 = fadd float %87, -1.000000e+00 %89 = fmul float %84, 2.000000e+00 %90 = fadd float %89, -2.000000e+00 %91 = fmul float %82, 2.000000e+00 %92 = fadd float %91, -1.000000e+00 %93 = fmul float %83, 2.000000e+00 %94 = fadd float %93, -1.000000e+00 %95 = fmul float %84, 2.000000e+00 %96 = fadd float %95, -1.000000e+00 %97 = bitcast float %75 to i32 %98 = bitcast float %76 to i32 %99 = insertelement <2 x i32> undef, i32 %97, i32 0 %100 = insertelement <2 x i32> %99, i32 %98, i32 1 %101 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %100, <32 x i8> %52, <16 x i8> %55, i32 2) %102 = extractelement <4 x float> %101, i32 0 %103 = extractelement <4 x float> %101, i32 1 %104 = fmul float %102, %86 %105 = fadd float %104, 0.000000e+00 %106 = fmul float %102, %88 %107 = fadd float %106, 0.000000e+00 %108 = fmul float %102, %90 %109 = fadd float %108, 1.000000e+00 %110 = fmul float %105, %105 %111 = fmul float %107, %107 %112 = fadd float %111, %110 %113 = fmul float %109, %109 %114 = fadd float %112, %113 %115 = call float @llvm.AMDGPU.rsq.clamped.f32(float %114) %116 = call float @llvm.minnum.f32(float %115, float 0x47EFFFFFE0000000) %117 = fmul float %105, %116 %118 = fmul float %107, %116 %119 = fmul float %109, %116 %120 = fmul float %72, %72 %121 = fmul float %73, %73 %122 = fadd float %121, %120 %123 = fmul float %74, %74 %124 = fadd float %122, %123 %125 = call float @llvm.AMDGPU.rsq.clamped.f32(float %124) %126 = call float @llvm.minnum.f32(float %125, float 0x47EFFFFFE0000000) %127 = fmul float %72, %126 %128 = fmul float %73, %126 %129 = fmul float %74, %126 %130 = fmul float %117, %127 %131 = fmul float %118, %128 %132 = fadd float %131, %130 %133 = fmul float %119, %129 %134 = fadd float %132, %133 %135 = fmul float %134, %117 %136 = fmul float %134, %118 %137 = fmul float %134, %119 %138 = fmul float %135, 2.000000e+00 %139 = fsub float %138, %127 %140 = fmul float %136, 2.000000e+00 %141 = fsub float %140, %128 %142 = fmul float %137, 2.000000e+00 %143 = fsub float %142, %129 %144 = fmul float %127, %92 %145 = fmul float %128, %94 %146 = fadd float %145, %144 %147 = fmul float %129, %96 %148 = fadd float %146, %147 %149 = call float @llvm.AMDIL.clamp.(float %148, float 0.000000e+00, float 1.000000e+00) %150 = insertelement <4 x float> undef, float %139, i32 0 %151 = insertelement <4 x float> %150, float %141, i32 1 %152 = insertelement <4 x float> %151, float %143, i32 2 %153 = insertelement <4 x float> %152, float 0.000000e+00, i32 3 %154 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %153) %155 = extractelement <4 x float> %154, i32 0 %156 = extractelement <4 x float> %154, i32 1 %157 = extractelement <4 x float> %154, i32 2 %158 = extractelement <4 x float> %154, i32 3 %159 = call float @fabs(float %157) %160 = fdiv float 1.000000e+00, %159 %161 = fmul float %155, %160 %162 = fadd float %161, 1.500000e+00 %163 = fmul float %156, %160 %164 = fadd float %163, 1.500000e+00 %165 = bitcast float %164 to i32 %166 = bitcast float %162 to i32 %167 = bitcast float %158 to i32 %168 = insertelement <4 x i32> undef, i32 %165, i32 0 %169 = insertelement <4 x i32> %168, i32 %166, i32 1 %170 = insertelement <4 x i32> %169, i32 %167, i32 2 %171 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %170, <32 x i8> %64, <16 x i8> %67, i32 4) %172 = extractelement <4 x float> %171, i32 0 %173 = extractelement <4 x float> %171, i32 1 %174 = extractelement <4 x float> %171, i32 2 %175 = fmul float %172, %102 %176 = fmul float %173, %102 %177 = fmul float %174, %102 %178 = fmul float %175, %35 %179 = fmul float %176, %35 %180 = fmul float %177, %35 %181 = fmul float %149, %149 %182 = fmul float %149, %181 %183 = fadd float %149, 0xBEB0C6F7A0000000 %184 = fmul float %178, %182 %185 = fmul float %179, %182 %186 = fmul float %180, %182 %187 = fmul float %184, %103 %188 = fmul float %185, %103 %189 = fmul float %186, %103 %190 = fmul float %103, %38 %191 = fmul float %190, %34 %192 = fadd float %191, %31 %193 = fmul float %190, %34 %194 = fadd float %193, %32 %195 = fmul float %190, %34 %196 = fadd float %195, %33 %197 = call float @llvm.AMDGPU.cndlt(float %183, float 0.000000e+00, float %187) %198 = call float @llvm.AMDGPU.cndlt(float %183, float 0.000000e+00, float %188) %199 = call float @llvm.AMDGPU.cndlt(float %183, float 0.000000e+00, float %189) %200 = bitcast float %75 to i32 %201 = bitcast float %76 to i32 %202 = insertelement <2 x i32> undef, i32 %200, i32 0 %203 = insertelement <2 x i32> %202, i32 %201, i32 1 %204 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %203, <32 x i8> %58, <16 x i8> %61, i32 2) %205 = extractelement <4 x float> %204, i32 0 %206 = extractelement <4 x float> %204, i32 1 %207 = extractelement <4 x float> %204, i32 2 %208 = fmul float %36, %205 %209 = fadd float %208, %197 %210 = fmul float %36, %206 %211 = fadd float %210, %198 %212 = fmul float %36, %207 %213 = fadd float %212, %199 %214 = fmul float %209, %37 %215 = fmul float %211, %37 %216 = fmul float %213, %37 %217 = fsub float 1.000000e+00, %24 %218 = fsub float 1.000000e+00, %25 %219 = fsub float 1.000000e+00, %26 %220 = fmul float %214, %217 %221 = fmul float %215, %218 %222 = fmul float %216, %219 %223 = fmul float %220, %30 %224 = fadd float %223, %27 %225 = fmul float %221, %30 %226 = fadd float %225, %28 %227 = fmul float %222, %30 %228 = fadd float %227, %29 %229 = fmul float %68, %68 %230 = fmul float %69, %69 %231 = fadd float %230, %229 %232 = fmul float %70, %70 %233 = fadd float %231, %232 %234 = call float @llvm.AMDGPU.rsq.clamped.f32(float %233) %235 = call float @llvm.minnum.f32(float %234, float 0x47EFFFFFE0000000) %236 = fmul float %68, %235 %237 = fmul float %69, %235 %238 = fmul float %70, %235 %239 = fmul float %117, %236 %240 = fmul float %118, %237 %241 = fadd float %240, %239 %242 = fmul float %119, %238 %243 = fadd float %241, %242 %244 = call float @llvm.AMDIL.clamp.(float %243, float 0.000000e+00, float 1.000000e+00) %245 = fmul float %139, %236 %246 = fmul float %141, %237 %247 = fadd float %246, %245 %248 = fmul float %143, %238 %249 = fadd float %247, %248 %250 = call float @llvm.AMDIL.clamp.(float %249, float 0.000000e+00, float 1.000000e+00) %251 = fmul float %244, %224 %252 = fmul float %244, %226 %253 = fmul float %244, %228 %254 = fadd float %244, 0xBEB0C6F7A0000000 %255 = call float @llvm.AMDGPU.cndlt(float %254, float 0.000000e+00, float %251) %256 = call float @llvm.AMDGPU.cndlt(float %254, float 0.000000e+00, float %252) %257 = call float @llvm.AMDGPU.cndlt(float %254, float 0.000000e+00, float %253) %258 = call float @fabs(float %250) %259 = call float @llvm.pow.f32(float %258, float %39) %260 = fadd float %250, 0xBEB0C6F7A0000000 %261 = fadd float %39, 8.000000e+00 %262 = fmul float %259, %261 %263 = fmul float %262, 0x3FA45F3060000000 %264 = fmul float %192, %263 %265 = fmul float %194, %263 %266 = fmul float %196, %263 %267 = call float @llvm.AMDGPU.cndlt(float %260, float 0.000000e+00, float %264) %268 = call float @llvm.AMDGPU.cndlt(float %260, float 0.000000e+00, float %265) %269 = call float @llvm.AMDGPU.cndlt(float %260, float 0.000000e+00, float %266) %270 = fadd float %267, %255 %271 = fadd float %268, %256 %272 = fadd float %269, %257 %273 = fmul float %270, %40 %274 = fadd float %273, %24 %275 = fmul float %271, %41 %276 = fadd float %275, %25 %277 = fmul float %272, %42 %278 = fadd float %277, %26 %279 = fmul float %224, %43 %280 = fadd float %279, %274 %281 = fmul float %226, %44 %282 = fadd float %281, %276 %283 = fmul float %228, %45 %284 = fadd float %283, %278 %285 = fmul float %71, 3.906250e-03 %286 = call i32 @llvm.SI.packf16(float %280, float %282) %287 = bitcast i32 %286 to float %288 = call i32 @llvm.SI.packf16(float %284, float %285) %289 = bitcast i32 %288 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %287, float %289, float %287, float %289) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 1, [m0] ; C8140700 v_interp_p2_f32 v5, [v5], v1, 3, 1, [m0] ; C8150701 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 v_interp_p1_f32 v8, v0, 2, 2, [m0] ; C8200A00 v_interp_p2_f32 v8, [v8], v1, 2, 2, [m0] ; C8210A01 v_interp_p1_f32 v9, v0, 0, 3, [m0] ; C8240C00 v_interp_p2_f32 v9, [v9], v1, 0, 3, [m0] ; C8250C01 v_interp_p1_f32 v10, v0, 1, 3, [m0] ; C8280D00 v_interp_p2_f32 v10, [v10], v1, 1, 3, [m0] ; C8290D01 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 s_load_dwordx4 s[8:11], s[4:5], 0x8 ; C0840508 s_load_dwordx4 s[12:15], s[4:5], 0xc ; C086050C s_load_dwordx8 s[40:47], s[6:7], 0x0 ; C0D40700 s_load_dwordx8 s[48:55], s[6:7], 0x8 ; C0D80708 s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710 s_load_dwordx8 s[24:31], s[6:7], 0x18 ; C0CC0718 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[40:47], s[32:35] ; F0800700 010A0B09 image_sample v[0:1], 3, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[48:55], s[36:39] ; F0800300 012C0009 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v14, 2.0, v13, -2.0 ; D282000E 03D61AF4 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v14, v0, v14, 1.0 ; D282000E 03CA1D00 v_mad_f32 v11, 2.0, v11, -1.0 ; D282000B 03CE16F4 v_mad_f32 v12, 2.0, v12, -1.0 ; D282000C 03CE18F4 v_mad_f32 v15, v0, v11, 0 ; D282000F 02021700 v_mad_f32 v16, v0, v12, 0 ; D2820010 02021900 v_mul_f32_e32 v17, v15, v15 ; 10221F0F v_mad_f32 v17, v16, v16, v17 ; D2820011 04462110 v_mad_f32 v17, v14, v14, v17 ; D2820011 04461D0E v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 v_min_f32_e32 v17, 0x7f7fffff, v17 ; 1E2222FF 7F7FFFFF v_mul_f32_e32 v18, v6, v6 ; 10240D06 v_mad_f32 v18, v7, v7, v18 ; D2820012 044A0F07 v_mad_f32 v18, v8, v8, v18 ; D2820012 044A1108 v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 v_mul_f32_e32 v15, v17, v15 ; 101E1F11 v_mul_f32_e32 v16, v17, v16 ; 10202111 v_mul_f32_e32 v14, v17, v14 ; 101C1D11 v_min_f32_e32 v17, 0x7f7fffff, v18 ; 1E2224FF 7F7FFFFF v_mul_f32_e32 v18, v17, v6 ; 10240D11 v_mul_f32_e32 v19, v17, v7 ; 10260F11 v_mul_f32_e32 v20, v18, v15 ; 10281F12 v_mad_f32 v20, v16, v19, v20 ; D2820014 04522710 v_mul_f32_e32 v21, v17, v8 ; 102A1111 v_mad_f32 v20, v14, v21, v20 ; D2820014 04522B0E v_mul_f32_e32 v22, v15, v20 ; 102C290F v_mad_f32 v22, v20, v15, v22 ; D2820016 045A1F14 v_mov_b32_e32 v26, 0 ; 7E340280 v_mad_f32 v23, -v6, v17, v22 ; D2820017 245A2306 v_mul_f32_e32 v6, v16, v20 ; 100C2910 v_mad_f32 v6, v20, v16, v6 ; D2820006 041A2114 v_mad_f32 v24, -v7, v17, v6 ; D2820018 241A2307 v_mul_f32_e32 v6, v14, v20 ; 100C290E v_mad_f32 v6, v20, v14, v6 ; D2820006 041A1D14 v_mad_f32 v25, -v8, v17, v6 ; D2820019 241A2308 v_cubeid_f32 v29, v23, v24, v25 ; D288001D 04663117 v_cubema_f32 v28, v23, v24, v25 ; D28E001C 04663117 v_cubesc_f32 v27, v23, v24, v25 ; D28A001B 04663117 v_cubetc_f32 v26, v23, v24, v25 ; D28C001A 04663117 v_mul_f32_e32 v6, v2, v2 ; 100C0502 v_mad_f32 v6, v3, v3, v6 ; D2820006 041A0703 v_rcp_f32_e64 v7, |v28| ; D3540107 0000011C v_mad_f32 v6, v4, v4, v6 ; D2820006 041A0904 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mov_b32_e32 v8, 0x3fc00000 ; 7E1002FF 3FC00000 v_mad_f32 v28, v26, v7, v8 ; D282001C 04220F1A v_mad_f32 v27, v27, v7, v8 ; D282001B 04220F1B image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[24:31], s[12:15] ; F0800700 00661A1B v_min_f32_e32 v6, 0x7f7fffff, v6 ; 1E0C0CFF 7F7FFFFF v_mul_f32_e32 v2, v6, v2 ; 10040506 v_mul_f32_e32 v3, v6, v3 ; 10060706 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v6, v2, v23 ; 100C2F02 v_mad_f32 v6, v24, v3, v6 ; D2820006 041A0718 v_mad_f32 v6, v25, v4, v6 ; D2820006 041A0919 v_mad_f32 v7, 2.0, v13, -1.0 ; D2820007 03CE1AF4 image_sample v[8:10], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[16:23], s[8:11] ; F0800700 00440809 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 v_mul_f32_e32 v11, v11, v18 ; 1016250B v_mad_f32 v11, v19, v12, v11 ; D282000B 042E1913 v_mad_f32 v7, v21, v7, v11 ; D2820007 042E0F15 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v11, v0, v26 ; 10163500 v_mul_f32_e32 v12, v0, v27 ; 10183700 v_mul_f32_e32 v0, v0, v28 ; 10003900 s_buffer_load_dword s5, s[0:3], 0x19 ; C2028119 s_buffer_load_dword s6, s[0:3], 0x1a ; C203011A s_buffer_load_dword s7, s[0:3], 0x1b ; C203811B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v11, s4, v11 ; 10161604 v_mul_f32_e32 v12, s4, v12 ; 10181804 v_mul_f32_e32 v0, s4, v0 ; 10000004 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 v_mul_f32_e32 v13, v7, v7 ; 101A0F07 v_mul_f32_e32 v13, v13, v7 ; 101A0F0D v_mul_f32_e32 v11, v13, v11 ; 1016170D v_mul_f32_e32 v12, v13, v12 ; 1018190D v_mul_f32_e32 v0, v13, v0 ; 1000010D v_mov_b32_e32 v13, 0xb58637bd ; 7E1A02FF B58637BD v_add_f32_e32 v7, v13, v7 ; 060E0F0D v_mul_f32_e32 v11, v1, v11 ; 10161701 v_mul_f32_e32 v12, v1, v12 ; 10181901 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_cmp_gt_f32_e32 vcc, 0, v7 ; 7C080E80 v_cndmask_b32_e64 v7, v11, 0, vcc ; D2000007 01A9010B v_cndmask_b32_e64 v11, v12, 0, vcc ; D200000B 01A9010C v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 v_mad_f32 v7, s5, v8, v7 ; D2820007 041E1005 v_mad_f32 v8, s5, v9, v11 ; D2820008 042E1205 v_mad_f32 v0, s5, v10, v0 ; D2820000 04021405 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_buffer_load_dword s5, s[0:3], 0x12 ; C2028112 s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113 s_buffer_load_dword s9, s[0:3], 0x14 ; C2048114 s_buffer_load_dword s10, s[0:3], 0x15 ; C2050115 s_buffer_load_dword s11, s[0:3], 0x16 ; C2058116 s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 s_buffer_load_dword s14, s[0:3], 0x2 ; C2070102 s_buffer_load_dword s15, s[0:3], 0x10 ; C2078110 s_buffer_load_dword s16, s[0:3], 0x11 ; C2080111 s_buffer_load_dword s17, s[0:3], 0x1c ; C208811C s_buffer_load_dword s18, s[0:3], 0x20 ; C2090120 s_buffer_load_dword s19, s[0:3], 0x21 ; C2098121 s_buffer_load_dword s20, s[0:3], 0x22 ; C20A0122 s_buffer_load_dword s21, s[0:3], 0x24 ; C20A8124 s_buffer_load_dword s22, s[0:3], 0x25 ; C20B0125 s_buffer_load_dword s2, s[0:3], 0x26 ; C2010126 v_mul_f32_e32 v1, s7, v1 ; 10020207 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v9, s9 ; 7E120209 v_mov_b32_e32 v10, s10 ; 7E14020A v_mov_b32_e32 v11, s11 ; 7E16020B v_mad_f32 v9, s4, v1, v9 ; D2820009 04260204 v_mad_f32 v10, s4, v1, v10 ; D282000A 042A0204 v_mad_f32 v1, s4, v1, v11 ; D2820001 042E0204 v_mul_f32_e32 v7, s6, v7 ; 100E0E06 v_mul_f32_e32 v8, s6, v8 ; 10101006 v_mul_f32_e32 v0, s6, v0 ; 10000006 v_sub_f32_e64 v11, 1.0, s12 ; D208000B 000018F2 v_mul_f32_e32 v7, v11, v7 ; 100E0F0B v_sub_f32_e64 v11, 1.0, s13 ; D208000B 00001AF2 v_mul_f32_e32 v8, v11, v8 ; 1010110B v_sub_f32_e64 v11, 1.0, s14 ; D208000B 00001CF2 v_mul_f32_e32 v0, v11, v0 ; 1000010B v_mov_b32_e32 v11, s15 ; 7E16020F v_mad_f32 v7, s8, v7, v11 ; D2820007 042E0E08 v_mov_b32_e32 v11, s16 ; 7E160210 v_mad_f32 v8, s8, v8, v11 ; D2820008 042E1008 v_mov_b32_e32 v11, s5 ; 7E160205 v_mad_f32 v0, s8, v0, v11 ; D2820000 042E0008 v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80 v_and_b32_e32 v11, 0x7fffffff, v6 ; 36160CFF 7FFFFFFF v_log_f32_e32 v11, v11 ; 7E164F0B v_mul_f32_e32 v2, v2, v15 ; 10041F02 v_mad_f32 v2, v16, v3, v2 ; D2820002 040A0710 v_mad_f32 v2, v14, v4, v2 ; D2820002 040A090E v_mul_legacy_f32_e32 v3, s17, v11 ; 0E061611 v_mov_b32_e32 v4, 0x41000000 ; 7E0802FF 41000000 v_add_f32_e32 v4, s17, v4 ; 06080811 v_exp_f32_e32 v3, v3 ; 7E064B03 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_add_f32_e32 v4, v13, v6 ; 06080D0D v_add_f32_e32 v6, v13, v2 ; 060C050D v_cmp_gt_f32_e32 vcc, 0, v6 ; 7C080C80 v_mul_f32_e32 v6, v7, v2 ; 100C0507 v_cndmask_b32_e64 v6, v6, 0, vcc ; D2000006 01A90106 v_mul_f32_e32 v3, 0x3d22f983, v3 ; 100606FF 3D22F983 v_mul_f32_e32 v9, v3, v9 ; 10121303 v_cmp_gt_f32_e64 s[0:1], 0, v4 ; D0080000 00020880 v_cndmask_b32_e64 v4, v9, 0, s[0:1] ; D2000004 00010109 v_add_f32_e32 v4, v6, v4 ; 06080906 v_mov_b32_e32 v6, s12 ; 7E0C020C v_mad_f32 v4, s18, v4, v6 ; D2820004 041A0812 v_mul_f32_e32 v6, v8, v2 ; 100C0508 v_mul_f32_e32 v2, v0, v2 ; 10040500 v_cndmask_b32_e64 v6, v6, 0, vcc ; D2000006 01A90106 v_cndmask_b32_e64 v2, v2, 0, vcc ; D2000002 01A90102 v_mul_f32_e32 v9, v3, v10 ; 10121503 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_mov_b32_e32 v3, s13 ; 7E06020D v_cndmask_b32_e64 v9, v9, 0, s[0:1] ; D2000009 00010109 v_add_f32_e32 v6, v6, v9 ; 060C1306 v_mad_f32 v3, s19, v6, v3 ; D2820003 040E0C13 v_cndmask_b32_e64 v1, v1, 0, s[0:1] ; D2000001 00010101 v_add_f32_e32 v1, v2, v1 ; 06020302 v_mov_b32_e32 v2, s14 ; 7E04020E v_mad_f32 v1, s20, v1, v2 ; D2820001 040A0214 v_mad_f32 v2, v7, s21, v4 ; D2820002 04102B07 v_mad_f32 v3, v8, s22, v3 ; D2820003 040C2D08 v_mad_f32 v0, v0, s2, v1 ; D2820000 04040500 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_mul_f32_e32 v2, 0x3b800000, v5 ; 10040AFF 3B800000 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 32 Code Size: 1160 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], COLOR, PERSPECTIVE DCL IN[1], COLOR[1], PERSPECTIVE DCL IN[2], TEXCOORD[0], PERSPECTIVE DCL IN[3], TEXCOORD[4], PERSPECTIVE DCL IN[4], TEXCOORD[5], PERSPECTIVE DCL IN[5], TEXCOORD[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL CONST[0..25] DCL TEMP[0..4] DCL TEMP[5], LOCAL DCL TEMP[6..8] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} IMM[1] FLT32 { 0.3000, 0.5900, 0.1100, 0.8581} IMM[2] FLT32 { -0.8581, 0.2477, 0.4290, 0.8862} IMM[3] FLT32 { 8.0000, 0.0398, 0.0039, 0.0000} IMM[4] FLT32 { 0.0000, 1.0000, 0.5000, -0.0000} IMM[5] FLT32 { 3.0000, -1.0000, -1.0233, 1.0233} IMM[6] FLT32 {-340282346638528859811704183484516925440.0000, 340282346638528859811704183484516925440.0000, 0.0000, 0.0000} 0: TEX TEMP[0], IN[2], SAMP[4], 2D 1: ADD TEMP[0].x, -TEMP[0].yyyy, IMM[0].zzzz 2: TEX TEMP[1], IN[2], SAMP[2], 2D 3: DP3 TEMP[0].y, TEMP[1].xxxx, IMM[1] 4: ADD TEMP[0].y, -TEMP[0].yyyy, IMM[0].zzzz 5: POW TEMP[1].w, |TEMP[0].yyyy|, CONST[21].yyyy 6: ADD TEMP[0].y, |TEMP[0].yyyy|, IMM[4].wwww 7: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[1].wwww 8: MUL TEMP[0].xzw, TEMP[0].xxxx, CONST[14].xyyz 9: CMP TEMP[0].xyz, TEMP[0].yyyy, IMM[4].xxxx, TEMP[0].xzww 10: ADD TEMP[2].xyz, TEMP[0], TEMP[0] 11: ABS TEMP[3].xyz, CONST[15] 12: MAX TEMP[4].xyz, TEMP[3], -IMM[4].wwww 13: LG2 TEMP[5].x, |TEMP[4].xxxx| 14: MAX TEMP[3].x, IMM[6].xxxx, TEMP[5].xxxx 15: LG2 TEMP[5].x, |TEMP[4].yyyy| 16: MAX TEMP[3].y, IMM[6].xxxx, TEMP[5].xxxx 17: LG2 TEMP[5].x, |TEMP[4].zzzz| 18: MAX TEMP[3].z, IMM[6].xxxx, TEMP[5].xxxx 19: MUL TEMP[3].xyz, TEMP[3], CONST[21].zzzz 20: EX2 TEMP[4].x, TEMP[3].xxxx 21: EX2 TEMP[4].y, TEMP[3].yyyy 22: EX2 TEMP[4].z, TEMP[3].zzzz 23: TEX TEMP[3], IN[2], SAMP[1], 2D 24: MAD TEMP[0].xyz, TEMP[3].zzzz, TEMP[4], -TEMP[0] 25: MAD TEMP[0].xyz, CONST[21].wwww, TEMP[0], TEMP[2] 26: MOV TEMP[2].xy, IN[2] 27: MUL TEMP[2].xy, TEMP[2], IN[1].wwww 28: MAD TEMP[2].xy, TEMP[2], IMM[0].wwww, IMM[0].wwww 29: MOV TEMP[4].xyz, IMM[4] 30: MAD TEMP[4], CONST[19].yyyy, TEMP[4].xyyx, TEMP[4].yxxz 31: MUL TEMP[2].xy, TEMP[2], TEMP[4] 32: MAD TEMP[2].xy, TEMP[4].zwzw, TEMP[2], CONST[13] 33: TEX TEMP[2], TEMP[2], SAMP[3], 2D 34: ADD_SAT TEMP[0].w, TEMP[2].xxxx, -CONST[20].zzzz 35: MUL_SAT TEMP[0].w, TEMP[0].wwww, CONST[21].xxxx 36: MUL TEMP[2].xyz, TEMP[1], TEMP[3].zzzz 37: MUL TEMP[2].xyz, TEMP[2], CONST[18].yyyy 38: MUL TEMP[2].xyz, TEMP[0].wwww, TEMP[2] 39: MAD TEMP[0].xyz, CONST[17].wwww, -TEMP[2], TEMP[0] 40: MUL TEMP[2].xyz, TEMP[2], CONST[17].wwww 41: MAD TEMP[0].xyz, CONST[22].xxxx, TEMP[0], TEMP[2] 42: ADD TEMP[0].xyz, TEMP[0], CONST[0] 43: TEX TEMP[2], IN[2], SAMP[6], 2D 44: LRP TEMP[4].xyz, TEMP[0].wwww, TEMP[1], TEMP[2] 45: MUL TEMP[1].xyz, CONST[16], CONST[16].wwww 46: DP3 TEMP[5].x, IN[5], IN[5] 47: RSQ TEMP[5].x, TEMP[5].xxxx 48: MIN TEMP[5].x, IMM[6].yyyy, TEMP[5].xxxx 49: MUL TEMP[2].xyz, IN[5], TEMP[5].xxxx 50: TEX TEMP[6], IN[2], SAMP[0], 2D 51: MAD TEMP[6].xyz, TEMP[6], IMM[0].xxxx, IMM[0].yyyy 52: DP3 TEMP[5].x, TEMP[6], TEMP[6] 53: RSQ TEMP[5].x, TEMP[5].xxxx 54: MIN TEMP[5].x, IMM[6].yyyy, TEMP[5].xxxx 55: MUL TEMP[7].xyz, TEMP[6], TEMP[5].xxxx 56: DP3 TEMP[0].w, TEMP[7], TEMP[2] 57: MUL TEMP[6].xyz, TEMP[0].wwww, TEMP[7] 58: MAD TEMP[2].xyz, TEMP[6], IMM[0].xxxx, -TEMP[2] 59: TEX TEMP[6], TEMP[2], SAMP[5], 2D 60: MUL TEMP[1].xyz, TEMP[1], TEMP[6] 61: MAD TEMP[1].xyz, TEMP[3].xxxx, TEMP[1], TEMP[4] 62: MUL TEMP[1].xyz, TEMP[1], CONST[22].yyyy 63: MUL TEMP[1].xyz, TEMP[1], CONST[22].zzzz 64: MOV TEMP[3].z, IMM[0].zzzz 65: ADD TEMP[3].xzw, TEMP[3].zzzz, -CONST[0].xyyz 66: MUL TEMP[1].xyz, TEMP[1], TEMP[3].xzww 67: MAD TEMP[1].xyz, TEMP[1], CONST[4].wwww, CONST[4] 68: DP3 TEMP[5].x, IN[1], IN[1] 69: RSQ TEMP[5].x, TEMP[5].xxxx 70: MIN TEMP[5].x, IMM[6].yyyy, TEMP[5].xxxx 71: MUL TEMP[4].xyz, IN[1], TEMP[5].xxxx 72: DP3 TEMP[5].x, IN[0], IN[0] 73: RSQ TEMP[5].x, TEMP[5].xxxx 74: MIN TEMP[5].x, IMM[6].yyyy, TEMP[5].xxxx 75: MUL TEMP[6].xyz, IN[0], TEMP[5].xxxx 76: MUL TEMP[3].xzw, TEMP[4].zyxy, TEMP[6].yyzx 77: MAD TEMP[3].xzw, TEMP[4].yyzx, TEMP[6].zyxy, -TEMP[3] 78: DP3 TEMP[4].y, TEMP[4], TEMP[7] 79: DP3 TEMP[4].z, TEMP[6], TEMP[7] 80: MUL TEMP[3].xzw, TEMP[3], IN[1].wwww 81: DP3 TEMP[4].x, TEMP[3].xzww, TEMP[7] 82: MUL TEMP[6].xz, TEMP[4], TEMP[4].yyyy 83: MUL TEMP[3].xzw, TEMP[4].xyyz, TEMP[4].xyyx 84: MUL TEMP[8].xyz, TEMP[4], IMM[5].zwzw 85: MAD TEMP[6].w, TEMP[4].zzzz, TEMP[4].zzzz, -TEMP[3].xxxx 86: MAD TEMP[6].y, TEMP[3].zzzz, IMM[5].xxxx, IMM[5].yyyy 87: MUL TEMP[8].w, TEMP[3].wwww, IMM[1].wwww 88: MUL TEMP[4], TEMP[6], IMM[2].xyxz 89: DP4 TEMP[0].w, CONST[8], TEMP[4] 90: DP4 TEMP[1].w, CONST[7], TEMP[8] 91: MOV TEMP[2].w, IMM[2].wwww 92: MAD TEMP[1].w, CONST[6].xxxx, TEMP[2].wwww, TEMP[1].wwww 93: ADD TEMP[6].x, TEMP[0].wwww, TEMP[1].wwww 94: DP4 TEMP[0].w, CONST[10], TEMP[4] 95: DP4 TEMP[1].w, CONST[12], TEMP[4] 96: DP4 TEMP[3].x, CONST[9], TEMP[8] 97: DP4 TEMP[3].z, CONST[11], TEMP[8] 98: MAD TEMP[3].z, CONST[6].zzzz, TEMP[2].wwww, TEMP[3].zzzz 99: ADD TEMP[6].z, TEMP[1].wwww, TEMP[3].zzzz 100: MAD TEMP[1].w, CONST[6].yyyy, TEMP[2].wwww, TEMP[3].xxxx 101: ADD TEMP[6].y, TEMP[0].wwww, TEMP[1].wwww 102: MUL TEMP[3].xzw, TEMP[1].xyyz, TEMP[6].xyyz 103: CMP TEMP[3].xzw, TEMP[6].xyyz, IMM[4].xxxx, TEMP[3] 104: ADD TEMP[0].xyz, TEMP[0], TEMP[3].xzww 105: DP3 TEMP[5].x, IN[3], IN[3] 106: RSQ TEMP[5].x, TEMP[5].xxxx 107: MIN TEMP[5].x, IMM[6].yyyy, TEMP[5].xxxx 108: MUL TEMP[4].xyz, IN[3], TEMP[5].xxxx 109: DP3_SAT TEMP[0].w, TEMP[2], TEMP[4] 110: DP3_SAT TEMP[1].w, TEMP[7], TEMP[4] 111: MUL TEMP[2].x, TEMP[3].yyyy, CONST[23].xxxx 112: MOV TEMP[3].x, IMM[3].xxxx 113: MAD TEMP[2].y, TEMP[3].yyyy, CONST[23].xxxx, TEMP[3].xxxx 114: POW TEMP[3].x, |TEMP[0].wwww|, TEMP[2].xxxx 115: ADD TEMP[0].w, TEMP[0].wwww, IMM[4].wwww 116: MUL TEMP[2].x, TEMP[2].yyyy, TEMP[3].xxxx 117: MUL TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy 118: TEX TEMP[3], IN[2], SAMP[7], 2D 119: MUL TEMP[2].yzw, TEMP[3].xxyz, CONST[22].wwww 120: MUL TEMP[2].yzw, TEMP[2], CONST[22].yyyy 121: MAD TEMP[2].yzw, TEMP[2], CONST[5].wwww, CONST[5].xxyz 122: MUL TEMP[2].xyz, TEMP[2].yzww, TEMP[2].xxxx 123: CMP TEMP[2].xyz, TEMP[0].wwww, IMM[4].xxxx, TEMP[2] 124: MUL TEMP[3].xyz, TEMP[1].wwww, TEMP[1] 125: ADD TEMP[0].w, TEMP[1].wwww, IMM[4].wwww 126: CMP TEMP[3].xyz, TEMP[0].wwww, IMM[4].xxxx, TEMP[3] 127: ADD TEMP[2].xyz, TEMP[2], TEMP[3] 128: MAD TEMP[0].xyz, TEMP[2], CONST[24], TEMP[0] 129: MAD OUT[0].xyz, TEMP[1], CONST[25], TEMP[0] 130: MUL OUT[0].w, IMM[3].zzzz, IN[4].wwww 131: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 284) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 328) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 340) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 344) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 348) %82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 356) %84 = call float @llvm.SI.load.const(<16 x i8> %23, i32 360) %85 = call float @llvm.SI.load.const(<16 x i8> %23, i32 364) %86 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %87 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %88 = call float @llvm.SI.load.const(<16 x i8> %23, i32 388) %89 = call float @llvm.SI.load.const(<16 x i8> %23, i32 392) %90 = call float @llvm.SI.load.const(<16 x i8> %23, i32 400) %91 = call float @llvm.SI.load.const(<16 x i8> %23, i32 404) %92 = call float @llvm.SI.load.const(<16 x i8> %23, i32 408) %93 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %94 = load <32 x i8>, <32 x i8> addrspace(2)* %93, align 32, !tbaa !0 %95 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 %97 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %98 = bitcast <8 x i32> addrspace(2)* %97 to <32 x i8> addrspace(2)* %99 = load <32 x i8>, <32 x i8> addrspace(2)* %98, align 32, !tbaa !0 %100 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %101 = bitcast <4 x i32> addrspace(2)* %100 to <16 x i8> addrspace(2)* %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 %103 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %104 = bitcast <8 x i32> addrspace(2)* %103 to <32 x i8> addrspace(2)* %105 = load <32 x i8>, <32 x i8> addrspace(2)* %104, align 32, !tbaa !0 %106 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %107 = bitcast <4 x i32> addrspace(2)* %106 to <16 x i8> addrspace(2)* %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !tbaa !0 %109 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %110 = bitcast <8 x i32> addrspace(2)* %109 to <32 x i8> addrspace(2)* %111 = load <32 x i8>, <32 x i8> addrspace(2)* %110, align 32, !tbaa !0 %112 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %113 = bitcast <4 x i32> addrspace(2)* %112 to <16 x i8> addrspace(2)* %114 = load <16 x i8>, <16 x i8> addrspace(2)* %113, align 16, !tbaa !0 %115 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %116 = bitcast <8 x i32> addrspace(2)* %115 to <32 x i8> addrspace(2)* %117 = load <32 x i8>, <32 x i8> addrspace(2)* %116, align 32, !tbaa !0 %118 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %119 = bitcast <4 x i32> addrspace(2)* %118 to <16 x i8> addrspace(2)* %120 = load <16 x i8>, <16 x i8> addrspace(2)* %119, align 16, !tbaa !0 %121 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %122 = bitcast <8 x i32> addrspace(2)* %121 to <32 x i8> addrspace(2)* %123 = load <32 x i8>, <32 x i8> addrspace(2)* %122, align 32, !tbaa !0 %124 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %125 = bitcast <4 x i32> addrspace(2)* %124 to <16 x i8> addrspace(2)* %126 = load <16 x i8>, <16 x i8> addrspace(2)* %125, align 16, !tbaa !0 %127 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %128 = bitcast <8 x i32> addrspace(2)* %127 to <32 x i8> addrspace(2)* %129 = load <32 x i8>, <32 x i8> addrspace(2)* %128, align 32, !tbaa !0 %130 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %131 = bitcast <4 x i32> addrspace(2)* %130 to <16 x i8> addrspace(2)* %132 = load <16 x i8>, <16 x i8> addrspace(2)* %131, align 16, !tbaa !0 %133 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7 %134 = bitcast <8 x i32> addrspace(2)* %133 to <32 x i8> addrspace(2)* %135 = load <32 x i8>, <32 x i8> addrspace(2)* %134, align 32, !tbaa !0 %136 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7 %137 = bitcast <4 x i32> addrspace(2)* %136 to <16 x i8> addrspace(2)* %138 = load <16 x i8>, <16 x i8> addrspace(2)* %137, align 16, !tbaa !0 %139 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %140 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %141 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %142 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %143 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %144 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %145 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %146 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %147 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %148 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %149 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %150 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %151 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %152 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %153 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %154 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %155 = bitcast float %146 to i32 %156 = bitcast float %147 to i32 %157 = insertelement <2 x i32> undef, i32 %155, i32 0 %158 = insertelement <2 x i32> %157, i32 %156, i32 1 %159 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %117, <16 x i8> %120, i32 2) %160 = extractelement <4 x float> %159, i32 1 %161 = fsub float 1.000000e+00, %160 %162 = bitcast float %146 to i32 %163 = bitcast float %147 to i32 %164 = insertelement <2 x i32> undef, i32 %162, i32 0 %165 = insertelement <2 x i32> %164, i32 %163, i32 1 %166 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %165, <32 x i8> %105, <16 x i8> %108, i32 2) %167 = extractelement <4 x float> %166, i32 0 %168 = extractelement <4 x float> %166, i32 1 %169 = extractelement <4 x float> %166, i32 2 %170 = fmul float %167, 0x3FD3333340000000 %171 = fmul float %167, 0x3FE2E147A0000000 %172 = fadd float %171, %170 %173 = fmul float %167, 0x3FBC28F5C0000000 %174 = fadd float %172, %173 %175 = fsub float 1.000000e+00, %174 %176 = call float @fabs(float %175) %177 = call float @llvm.pow.f32(float %176, float %79) %178 = call float @fabs(float %175) %179 = fadd float %178, 0xBEB0C6F7A0000000 %180 = fmul float %161, %177 %181 = fmul float %180, %64 %182 = fmul float %180, %65 %183 = fmul float %180, %66 %184 = call float @llvm.AMDGPU.cndlt(float %179, float 0.000000e+00, float %181) %185 = call float @llvm.AMDGPU.cndlt(float %179, float 0.000000e+00, float %182) %186 = call float @llvm.AMDGPU.cndlt(float %179, float 0.000000e+00, float %183) %187 = fadd float %184, %184 %188 = fadd float %185, %185 %189 = fadd float %186, %186 %190 = call float @fabs(float %67) %191 = call float @fabs(float %68) %192 = call float @fabs(float %69) %193 = call float @llvm.maxnum.f32(float %190, float 0x3EB0C6F7A0000000) %194 = call float @llvm.maxnum.f32(float %191, float 0x3EB0C6F7A0000000) %195 = call float @llvm.maxnum.f32(float %192, float 0x3EB0C6F7A0000000) %196 = call float @fabs(float %193) %197 = call float @llvm.log2.f32(float %196) %198 = call float @llvm.maxnum.f32(float %197, float 0xC7EFFFFFE0000000) %199 = call float @fabs(float %194) %200 = call float @llvm.log2.f32(float %199) %201 = call float @llvm.maxnum.f32(float %200, float 0xC7EFFFFFE0000000) %202 = call float @fabs(float %195) %203 = call float @llvm.log2.f32(float %202) %204 = call float @llvm.maxnum.f32(float %203, float 0xC7EFFFFFE0000000) %205 = fmul float %198, %80 %206 = fmul float %201, %80 %207 = fmul float %204, %80 %208 = call float @llvm.AMDIL.exp.(float %205) %209 = call float @llvm.AMDIL.exp.(float %206) %210 = call float @llvm.AMDIL.exp.(float %207) %211 = bitcast float %146 to i32 %212 = bitcast float %147 to i32 %213 = insertelement <2 x i32> undef, i32 %211, i32 0 %214 = insertelement <2 x i32> %213, i32 %212, i32 1 %215 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %214, <32 x i8> %99, <16 x i8> %102, i32 2) %216 = extractelement <4 x float> %215, i32 0 %217 = extractelement <4 x float> %215, i32 1 %218 = extractelement <4 x float> %215, i32 2 %219 = fmul float %218, %208 %220 = fsub float %219, %184 %221 = fmul float %218, %209 %222 = fsub float %221, %185 %223 = fmul float %218, %210 %224 = fsub float %223, %186 %225 = fmul float %81, %220 %226 = fadd float %225, %187 %227 = fmul float %81, %222 %228 = fadd float %227, %188 %229 = fmul float %81, %224 %230 = fadd float %229, %189 %231 = fmul float %146, %145 %232 = fmul float %147, %145 %233 = fmul float %231, 5.000000e-01 %234 = fadd float %233, 5.000000e-01 %235 = fmul float %232, 5.000000e-01 %236 = fadd float %235, 5.000000e-01 %237 = fmul float %76, 0.000000e+00 %238 = fadd float %237, 1.000000e+00 %239 = fadd float %76, 0.000000e+00 %240 = fadd float %76, 0.000000e+00 %241 = fmul float %76, 0.000000e+00 %242 = fadd float %241, 5.000000e-01 %243 = fmul float %234, %238 %244 = fmul float %236, %239 %245 = fmul float %240, %243 %246 = fadd float %245, %62 %247 = fmul float %242, %244 %248 = fadd float %247, %63 %249 = bitcast float %246 to i32 %250 = bitcast float %248 to i32 %251 = insertelement <2 x i32> undef, i32 %249, i32 0 %252 = insertelement <2 x i32> %251, i32 %250, i32 1 %253 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %252, <32 x i8> %111, <16 x i8> %114, i32 2) %254 = extractelement <4 x float> %253, i32 0 %255 = fsub float %254, %77 %256 = call float @llvm.AMDIL.clamp.(float %255, float 0.000000e+00, float 1.000000e+00) %257 = fmul float %256, %78 %258 = call float @llvm.AMDIL.clamp.(float %257, float 0.000000e+00, float 1.000000e+00) %259 = fmul float %167, %218 %260 = fmul float %168, %218 %261 = fmul float %169, %218 %262 = fmul float %259, %75 %263 = fmul float %260, %75 %264 = fmul float %261, %75 %265 = fmul float %258, %262 %266 = fmul float %258, %263 %267 = fmul float %258, %264 %268 = fmul float %265, %74 %269 = fsub float %226, %268 %270 = fmul float %266, %74 %271 = fsub float %228, %270 %272 = fmul float %267, %74 %273 = fsub float %230, %272 %274 = fmul float %265, %74 %275 = fmul float %266, %74 %276 = fmul float %267, %74 %277 = fmul float %82, %269 %278 = fadd float %277, %274 %279 = fmul float %82, %271 %280 = fadd float %279, %275 %281 = fmul float %82, %273 %282 = fadd float %281, %276 %283 = fadd float %278, %24 %284 = fadd float %280, %25 %285 = fadd float %282, %26 %286 = bitcast float %146 to i32 %287 = bitcast float %147 to i32 %288 = insertelement <2 x i32> undef, i32 %286, i32 0 %289 = insertelement <2 x i32> %288, i32 %287, i32 1 %290 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %289, <32 x i8> %129, <16 x i8> %132, i32 2) %291 = extractelement <4 x float> %290, i32 0 %292 = extractelement <4 x float> %290, i32 1 %293 = extractelement <4 x float> %290, i32 2 %294 = call float @llvm.AMDGPU.lrp(float %258, float %167, float %291) %295 = call float @llvm.AMDGPU.lrp(float %258, float %168, float %292) %296 = call float @llvm.AMDGPU.lrp(float %258, float %169, float %293) %297 = fmul float %70, %73 %298 = fmul float %71, %73 %299 = fmul float %72, %73 %300 = fmul float %152, %152 %301 = fmul float %153, %153 %302 = fadd float %301, %300 %303 = fmul float %154, %154 %304 = fadd float %302, %303 %305 = call float @llvm.AMDGPU.rsq.clamped.f32(float %304) %306 = call float @llvm.minnum.f32(float %305, float 0x47EFFFFFE0000000) %307 = fmul float %152, %306 %308 = fmul float %153, %306 %309 = fmul float %154, %306 %310 = bitcast float %146 to i32 %311 = bitcast float %147 to i32 %312 = insertelement <2 x i32> undef, i32 %310, i32 0 %313 = insertelement <2 x i32> %312, i32 %311, i32 1 %314 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %313, <32 x i8> %94, <16 x i8> %96, i32 2) %315 = extractelement <4 x float> %314, i32 0 %316 = extractelement <4 x float> %314, i32 1 %317 = extractelement <4 x float> %314, i32 2 %318 = fmul float %315, 2.000000e+00 %319 = fadd float %318, -1.000000e+00 %320 = fmul float %316, 2.000000e+00 %321 = fadd float %320, -1.000000e+00 %322 = fmul float %317, 2.000000e+00 %323 = fadd float %322, -1.000000e+00 %324 = fmul float %319, %319 %325 = fmul float %321, %321 %326 = fadd float %325, %324 %327 = fmul float %323, %323 %328 = fadd float %326, %327 %329 = call float @llvm.AMDGPU.rsq.clamped.f32(float %328) %330 = call float @llvm.minnum.f32(float %329, float 0x47EFFFFFE0000000) %331 = fmul float %319, %330 %332 = fmul float %321, %330 %333 = fmul float %323, %330 %334 = fmul float %331, %307 %335 = fmul float %332, %308 %336 = fadd float %335, %334 %337 = fmul float %333, %309 %338 = fadd float %336, %337 %339 = fmul float %338, %331 %340 = fmul float %338, %332 %341 = fmul float %338, %333 %342 = fmul float %339, 2.000000e+00 %343 = fsub float %342, %307 %344 = fmul float %340, 2.000000e+00 %345 = fsub float %344, %308 %346 = fmul float %341, 2.000000e+00 %347 = fsub float %346, %309 %348 = bitcast float %343 to i32 %349 = bitcast float %345 to i32 %350 = insertelement <2 x i32> undef, i32 %348, i32 0 %351 = insertelement <2 x i32> %350, i32 %349, i32 1 %352 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %351, <32 x i8> %123, <16 x i8> %126, i32 2) %353 = extractelement <4 x float> %352, i32 0 %354 = extractelement <4 x float> %352, i32 1 %355 = extractelement <4 x float> %352, i32 2 %356 = fmul float %297, %353 %357 = fmul float %298, %354 %358 = fmul float %299, %355 %359 = fmul float %216, %356 %360 = fadd float %359, %294 %361 = fmul float %216, %357 %362 = fadd float %361, %295 %363 = fmul float %216, %358 %364 = fadd float %363, %296 %365 = fmul float %360, %83 %366 = fmul float %362, %83 %367 = fmul float %364, %83 %368 = fmul float %365, %84 %369 = fmul float %366, %84 %370 = fmul float %367, %84 %371 = fsub float 1.000000e+00, %24 %372 = fsub float 1.000000e+00, %25 %373 = fsub float 1.000000e+00, %26 %374 = fmul float %368, %371 %375 = fmul float %369, %372 %376 = fmul float %370, %373 %377 = fmul float %374, %30 %378 = fadd float %377, %27 %379 = fmul float %375, %30 %380 = fadd float %379, %28 %381 = fmul float %376, %30 %382 = fadd float %381, %29 %383 = fmul float %142, %142 %384 = fmul float %143, %143 %385 = fadd float %384, %383 %386 = fmul float %144, %144 %387 = fadd float %385, %386 %388 = call float @llvm.AMDGPU.rsq.clamped.f32(float %387) %389 = call float @llvm.minnum.f32(float %388, float 0x47EFFFFFE0000000) %390 = fmul float %142, %389 %391 = fmul float %143, %389 %392 = fmul float %144, %389 %393 = fmul float %139, %139 %394 = fmul float %140, %140 %395 = fadd float %394, %393 %396 = fmul float %141, %141 %397 = fadd float %395, %396 %398 = call float @llvm.AMDGPU.rsq.clamped.f32(float %397) %399 = call float @llvm.minnum.f32(float %398, float 0x47EFFFFFE0000000) %400 = fmul float %139, %399 %401 = fmul float %140, %399 %402 = fmul float %141, %399 %403 = fmul float %392, %401 %404 = fmul float %390, %402 %405 = fmul float %391, %400 %406 = fmul float %391, %402 %407 = fsub float %406, %403 %408 = fmul float %392, %400 %409 = fsub float %408, %404 %410 = fmul float %390, %401 %411 = fsub float %410, %405 %412 = fmul float %390, %331 %413 = fmul float %391, %332 %414 = fadd float %413, %412 %415 = fmul float %392, %333 %416 = fadd float %414, %415 %417 = fmul float %400, %331 %418 = fmul float %401, %332 %419 = fadd float %418, %417 %420 = fmul float %402, %333 %421 = fadd float %419, %420 %422 = fmul float %407, %145 %423 = fmul float %409, %145 %424 = fmul float %411, %145 %425 = fmul float %422, %331 %426 = fmul float %423, %332 %427 = fadd float %426, %425 %428 = fmul float %424, %333 %429 = fadd float %427, %428 %430 = fmul float %429, %416 %431 = fmul float %421, %416 %432 = fmul float %429, %429 %433 = fmul float %416, %416 %434 = fmul float %421, %429 %435 = fmul float %429, 0xBFF05F8CE0000000 %436 = fmul float %416, 0x3FF05F8CE0000000 %437 = fmul float %421, 0xBFF05F8CE0000000 %438 = fmul float %421, %421 %439 = fsub float %438, %432 %440 = fmul float %433, 3.000000e+00 %441 = fadd float %440, -1.000000e+00 %442 = fmul float %434, 0x3FEB756F20000000 %443 = fmul float %430, 0xBFEB756F20000000 %444 = fmul float %441, 0x3FCFB4E7C0000000 %445 = fmul float %431, 0xBFEB756F20000000 %446 = fmul float %439, 0x3FDB756F20000000 %447 = fmul float %42, %443 %448 = fmul float %43, %444 %449 = fadd float %447, %448 %450 = fmul float %44, %445 %451 = fadd float %449, %450 %452 = fmul float %45, %446 %453 = fadd float %451, %452 %454 = fmul float %38, %435 %455 = fmul float %39, %436 %456 = fadd float %454, %455 %457 = fmul float %40, %437 %458 = fadd float %456, %457 %459 = fmul float %41, %442 %460 = fadd float %458, %459 %461 = fmul float %35, 0x3FEC5BFA00000000 %462 = fadd float %461, %460 %463 = fadd float %453, %462 %464 = fmul float %50, %443 %465 = fmul float %51, %444 %466 = fadd float %464, %465 %467 = fmul float %52, %445 %468 = fadd float %466, %467 %469 = fmul float %53, %446 %470 = fadd float %468, %469 %471 = fmul float %58, %443 %472 = fmul float %59, %444 %473 = fadd float %471, %472 %474 = fmul float %60, %445 %475 = fadd float %473, %474 %476 = fmul float %61, %446 %477 = fadd float %475, %476 %478 = fmul float %46, %435 %479 = fmul float %47, %436 %480 = fadd float %478, %479 %481 = fmul float %48, %437 %482 = fadd float %480, %481 %483 = fmul float %49, %442 %484 = fadd float %482, %483 %485 = fmul float %54, %435 %486 = fmul float %55, %436 %487 = fadd float %485, %486 %488 = fmul float %56, %437 %489 = fadd float %487, %488 %490 = fmul float %57, %442 %491 = fadd float %489, %490 %492 = fmul float %37, 0x3FEC5BFA00000000 %493 = fadd float %492, %491 %494 = fadd float %477, %493 %495 = fmul float %36, 0x3FEC5BFA00000000 %496 = fadd float %495, %484 %497 = fadd float %470, %496 %498 = fmul float %378, %463 %499 = fmul float %380, %497 %500 = fmul float %382, %494 %501 = call float @llvm.AMDGPU.cndlt(float %463, float 0.000000e+00, float %498) %502 = call float @llvm.AMDGPU.cndlt(float %497, float 0.000000e+00, float %499) %503 = call float @llvm.AMDGPU.cndlt(float %494, float 0.000000e+00, float %500) %504 = fadd float %283, %501 %505 = fadd float %284, %502 %506 = fadd float %285, %503 %507 = fmul float %148, %148 %508 = fmul float %149, %149 %509 = fadd float %508, %507 %510 = fmul float %150, %150 %511 = fadd float %509, %510 %512 = call float @llvm.AMDGPU.rsq.clamped.f32(float %511) %513 = call float @llvm.minnum.f32(float %512, float 0x47EFFFFFE0000000) %514 = fmul float %148, %513 %515 = fmul float %149, %513 %516 = fmul float %150, %513 %517 = fmul float %343, %514 %518 = fmul float %345, %515 %519 = fadd float %518, %517 %520 = fmul float %347, %516 %521 = fadd float %519, %520 %522 = call float @llvm.AMDIL.clamp.(float %521, float 0.000000e+00, float 1.000000e+00) %523 = fmul float %331, %514 %524 = fmul float %332, %515 %525 = fadd float %524, %523 %526 = fmul float %333, %516 %527 = fadd float %525, %526 %528 = call float @llvm.AMDIL.clamp.(float %527, float 0.000000e+00, float 1.000000e+00) %529 = fmul float %217, %86 %530 = fmul float %217, %86 %531 = fadd float %530, 8.000000e+00 %532 = call float @fabs(float %522) %533 = call float @llvm.pow.f32(float %532, float %529) %534 = fadd float %522, 0xBEB0C6F7A0000000 %535 = fmul float %531, %533 %536 = fmul float %535, 0x3FA45F3060000000 %537 = bitcast float %146 to i32 %538 = bitcast float %147 to i32 %539 = insertelement <2 x i32> undef, i32 %537, i32 0 %540 = insertelement <2 x i32> %539, i32 %538, i32 1 %541 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %540, <32 x i8> %135, <16 x i8> %138, i32 2) %542 = extractelement <4 x float> %541, i32 0 %543 = extractelement <4 x float> %541, i32 1 %544 = extractelement <4 x float> %541, i32 2 %545 = fmul float %542, %85 %546 = fmul float %543, %85 %547 = fmul float %544, %85 %548 = fmul float %545, %83 %549 = fmul float %546, %83 %550 = fmul float %547, %83 %551 = fmul float %548, %34 %552 = fadd float %551, %31 %553 = fmul float %549, %34 %554 = fadd float %553, %32 %555 = fmul float %550, %34 %556 = fadd float %555, %33 %557 = fmul float %552, %536 %558 = fmul float %554, %536 %559 = fmul float %556, %536 %560 = call float @llvm.AMDGPU.cndlt(float %534, float 0.000000e+00, float %557) %561 = call float @llvm.AMDGPU.cndlt(float %534, float 0.000000e+00, float %558) %562 = call float @llvm.AMDGPU.cndlt(float %534, float 0.000000e+00, float %559) %563 = fmul float %528, %378 %564 = fmul float %528, %380 %565 = fmul float %528, %382 %566 = fadd float %528, 0xBEB0C6F7A0000000 %567 = call float @llvm.AMDGPU.cndlt(float %566, float 0.000000e+00, float %563) %568 = call float @llvm.AMDGPU.cndlt(float %566, float 0.000000e+00, float %564) %569 = call float @llvm.AMDGPU.cndlt(float %566, float 0.000000e+00, float %565) %570 = fadd float %560, %567 %571 = fadd float %561, %568 %572 = fadd float %562, %569 %573 = fmul float %570, %87 %574 = fadd float %573, %504 %575 = fmul float %571, %88 %576 = fadd float %575, %505 %577 = fmul float %572, %89 %578 = fadd float %577, %506 %579 = fmul float %378, %90 %580 = fadd float %579, %574 %581 = fmul float %380, %91 %582 = fadd float %581, %576 %583 = fmul float %382, %92 %584 = fadd float %583, %578 %585 = fmul float %151, 3.906250e-03 %586 = call i32 @llvm.SI.packf16(float %580, float %582) %587 = bitcast i32 %586 to float %588 = call i32 @llvm.SI.packf16(float %584, float %585) %589 = bitcast i32 %588 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %587, float %589, float %587, float %589) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_interp_p1_f32 v5, v0, 2, 0, [m0] ; C8140200 v_interp_p2_f32 v5, [v5], v1, 2, 0, [m0] ; C8150201 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s9, s[0:3], 0x38 ; C2048138 s_buffer_load_dword s10, s[0:3], 0x39 ; C2050139 s_buffer_load_dword s8, s[0:3], 0x3a ; C204013A s_buffer_load_dword s14, s[0:3], 0x3c ; C207013C s_buffer_load_dword s12, s[0:3], 0x3d ; C206013D s_buffer_load_dword s13, s[0:3], 0x3e ; C206813E s_buffer_load_dword s18, s[0:3], 0x40 ; C2090140 s_buffer_load_dword s17, s[0:3], 0x41 ; C2088141 s_buffer_load_dword s16, s[0:3], 0x42 ; C2080142 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_buffer_load_dword s19, s[0:3], 0x43 ; C2098143 s_buffer_load_dword s15, s[0:3], 0x55 ; C2078155 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 s_buffer_load_dword s11, s[0:3], 0x56 ; C2058156 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 s_buffer_load_dword s52, s[0:3], 0x34 ; C21A0134 s_buffer_load_dword s53, s[0:3], 0x35 ; C21A8135 s_buffer_load_dword s54, s[0:3], 0x4d ; C21B014D s_buffer_load_dword s55, s[0:3], 0x52 ; C21B8152 s_buffer_load_dword s56, s[0:3], 0x54 ; C21C0154 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v13, v0, 1, 3, [m0] ; C8340D00 v_interp_p2_f32 v13, [v13], v1, 1, 3, [m0] ; C8350D01 v_interp_p1_f32 v14, v0, 2, 3, [m0] ; C8380E00 v_interp_p2_f32 v14, [v14], v1, 2, 3, [m0] ; C8390E01 v_interp_p1_f32 v2, v0, 3, 4, [m0] ; C8081300 v_interp_p2_f32 v2, [v2], v1, 3, 4, [m0] ; C8091301 v_interp_p1_f32 v15, v0, 0, 5, [m0] ; C83C1400 s_load_dwordx4 s[32:35], s[4:5], 0x10 ; C0900510 s_load_dwordx8 s[36:43], s[6:7], 0x20 ; C0D20720 s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504 s_load_dwordx4 s[28:31], s[4:5], 0x8 ; C08E0508 s_load_dwordx8 s[60:67], s[6:7], 0x10 ; C0DE0710 s_load_dwordx8 s[44:51], s[6:7], 0x8 ; C0D60708 v_interp_p2_f32 v15, [v15], v1, 0, 5, [m0] ; C83D1401 v_interp_p1_f32 v16, v0, 1, 5, [m0] ; C8401500 v_interp_p2_f32 v16, [v16], v1, 1, 5, [m0] ; C8411501 v_interp_p1_f32 v0, v0, 2, 5, [m0] ; C8001600 v_interp_p2_f32 v0, [v0], v1, 2, 5, [m0] ; C8011601 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 2, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[36:43], s[32:35] ; F0800200 0109010A s_load_dwordx4 s[32:35], s[4:5], 0xc ; C090050C s_load_dwordx8 s[36:43], s[6:7], 0x18 ; C0D20718 image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[60:67], s[28:31] ; F0800700 00EF110A image_sample v[20:22], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[44:51], s[24:27] ; F0800700 00CB140A v_mul_f32_e32 v23, v9, v10 ; 102E1509 v_mad_f32 v23, 0.5, v23, 0.5 ; D2820017 03C22EF0 v_mad_f32 v24, 0, s54, 1.0 ; D2820018 03C86C80 v_mul_f32_e32 v23, v24, v23 ; 102E2F18 v_add_f32_e64 v24, 0, s54 ; D2060018 00006C80 v_mad_f32 v25, 0, s54, 0.5 ; D2820019 03C06C80 s_load_dwordx4 s[44:47], s[4:5], 0x18 ; C0960518 s_load_dwordx8 s[60:67], s[6:7], 0x30 ; C0DE0730 v_mad_f32 v26, v24, v23, s52 ; D282001A 00D22F18 v_mul_f32_e32 v23, v9, v11 ; 102E1709 v_mad_f32 v23, 0.5, v23, 0.5 ; D2820017 03C22EF0 v_mul_f32_e32 v23, v24, v23 ; 102E2F18 v_mad_f32 v27, v25, v23, s53 ; D282001B 00D62F19 s_waitcnt vmcnt(2) lgkmcnt(0) ; BF8C0072 image_sample v23, 1, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[36:43], s[32:35] ; F0800100 0109171A s_load_dwordx4 s[24:27], s[4:5], 0x1c ; C08C051C s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 s_load_dwordx8 s[28:35], s[6:7], 0x38 ; C0CE0738 s_waitcnt vmcnt(1) ; BF8C0771 image_sample v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[60:67], s[44:47] ; F0800700 016F180A s_waitcnt vmcnt(1) ; BF8C0771 v_subrev_f32_e32 v23, s55, v23 ; 0A2E2E37 v_add_f32_e64 v23, 0, v23 clamp ; D2060817 00022E80 v_mul_f32_e32 v23, s56, v23 ; 102E2E38 v_add_f32_e64 v23, 0, v23 clamp ; D2060817 00022E80 v_sub_f32_e32 v27, 1.0, v23 ; 08362EF2 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v24, v24, v27 ; 10303718 v_mul_f32_e32 v25, v25, v27 ; 10323719 v_mul_f32_e32 v26, v26, v27 ; 1034371A image_sample v[27:29], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[36:43], s[20:23] ; F0800700 00A91B0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v27, 2.0, v27, -1.0 ; D282001B 03CE36F4 v_mad_f32 v28, 2.0, v28, -1.0 ; D282001C 03CE38F4 v_mad_f32 v29, 2.0, v29, -1.0 ; D282001D 03CE3AF4 v_mul_f32_e32 v30, v27, v27 ; 103C371B v_mad_f32 v30, v28, v28, v30 ; D282001E 047A391C v_mad_f32 v30, v29, v29, v30 ; D282001E 047A3B1D v_rsq_clamp_f32_e32 v30, v30 ; 7E3C591E v_mul_f32_e32 v31, v15, v15 ; 103E1F0F v_mad_f32 v31, v16, v16, v31 ; D282001F 047E2110 v_mad_f32 v31, v0, v0, v31 ; D282001F 047E0100 v_rsq_clamp_f32_e32 v31, v31 ; 7E3E591F v_min_f32_e32 v30, 0x7f7fffff, v30 ; 1E3C3CFF 7F7FFFFF v_mul_f32_e32 v27, v30, v27 ; 1036371E v_mul_f32_e32 v28, v30, v28 ; 1038391E v_mul_f32_e32 v29, v30, v29 ; 103A3B1E v_min_f32_e32 v30, 0x7f7fffff, v31 ; 1E3C3EFF 7F7FFFFF v_mul_f32_e32 v31, v30, v15 ; 103E1F1E v_mul_f32_e32 v31, v31, v27 ; 103E371F v_mul_f32_e32 v32, v30, v16 ; 1040211E v_mad_f32 v31, v28, v32, v31 ; D282001F 047E411C v_mul_f32_e32 v32, v30, v0 ; 1040011E v_mad_f32 v31, v29, v32, v31 ; D282001F 047E411D s_load_dwordx4 s[20:23], s[4:5], 0x14 ; C08A0514 s_load_dwordx8 s[36:43], s[6:7], 0x28 ; C0D20728 v_mul_f32_e32 v32, v27, v31 ; 10403F1B v_mad_f32 v32, v31, v27, v32 ; D2820020 0482371F v_mad_f32 v32, -v15, v30, v32 ; D2820020 24823D0F v_mul_f32_e32 v15, v28, v31 ; 101E3F1C v_mad_f32 v15, v31, v28, v15 ; D282000F 043E391F v_mad_f32 v33, -v16, v30, v15 ; D2820021 243E3D10 v_mov_b32_e32 v15, s19 ; 7E1E0213 v_mul_f32_e32 v15, s18, v15 ; 101E1E12 v_mov_b32_e32 v16, s19 ; 7E200213 v_mul_f32_e32 v16, s17, v16 ; 10202011 v_mov_b32_e32 v34, s19 ; 7E440213 v_mul_f32_e32 v34, s16, v34 ; 10444410 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[35:37], 7, 0, 0, 0, 0, 0, 0, 0, v[32:33], s[36:43], s[20:23] ; F0800700 00A92320 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v15, v35, v15 ; 101E1F23 v_mul_f32_e32 v16, v36, v16 ; 10202124 v_mul_f32_e32 v34, v37, v34 ; 10444525 v_mul_f32_e32 v35, 0x3e99999a, v17 ; 104622FF 3E99999A v_madmk_f32_e32 v35, v17, v35, 0x3f170a3d ; 40464711 3F170A3D v_madmk_f32_e32 v35, v17, v35, 0x3de147ae ; 40464711 3DE147AE v_mad_f32 v24, v23, v17, v24 ; D2820018 04622317 v_mad_f32 v25, v23, v18, v25 ; D2820019 04662517 v_mad_f32 v26, v23, v19, v26 ; D282001A 046A2717 v_mul_f32_e32 v17, v22, v17 ; 10222316 v_mul_f32_e32 v18, v22, v18 ; 10242516 v_mul_f32_e32 v19, v22, v19 ; 10262716 v_mul_f32_e32 v36, v12, v12 ; 1048190C v_mad_f32 v36, v13, v13, v36 ; D2820024 04921B0D v_mad_f32 v36, v14, v14, v36 ; D2820024 04921D0E v_rsq_clamp_f32_e32 v36, v36 ; 7E485924 v_mul_f32_e32 v37, v29, v31 ; 104A3F1D v_mad_f32 v31, v31, v29, v37 ; D282001F 04963B1F v_mad_f32 v0, -v0, v30, v31 ; D2820000 247E3D00 v_min_f32_e32 v30, 0x7f7fffff, v36 ; 1E3C48FF 7F7FFFFF v_mul_f32_e32 v12, v30, v12 ; 1018191E v_mul_f32_e32 v13, v30, v13 ; 101A1B1E v_mul_f32_e32 v31, v12, v32 ; 103E410C v_mad_f32 v31, v33, v13, v31 ; D282001F 047E1B21 v_sub_f32_e32 v32, 1.0, v35 ; 084046F2 v_mov_b32_e32 v33, 0x7fffffff ; 7E4202FF 7FFFFFFF v_and_b32_e32 v35, v32, v33 ; 36464320 v_log_f32_e32 v35, v35 ; 7E464F23 s_buffer_load_dword s4, s[0:3], 0x57 ; C2020157 s_buffer_load_dword s5, s[0:3], 0x58 ; C2028158 s_buffer_load_dword s6, s[0:3], 0x59 ; C2030159 v_mul_legacy_f32_e32 v35, s15, v35 ; 0E46460F v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_exp_f32_e32 v35, v35 ; 7E464B23 v_mul_f32_e32 v1, v35, v1 ; 10020323 v_mov_b32_e32 v35, 0x358637bd ; 7E4602FF 358637BD v_max_f32_e64 v36, |s14|, v35 ; D2200124 0002460E v_max_f32_e64 v37, |s12|, v35 ; D2200125 0002460C v_max_f32_e64 v35, |s13|, v35 ; D2200123 0002460D v_log_f32_e64 v36, |v36| ; D34E0124 00000124 v_log_f32_e64 v37, |v37| ; D34E0125 00000125 v_log_f32_e64 v35, |v35| ; D34E0123 00000123 v_mov_b32_e32 v38, 0xff7fffff ; 7E4C02FF FF7FFFFF v_max_f32_e32 v36, v38, v36 ; 20484926 v_max_f32_e32 v37, v38, v37 ; 204A4B26 v_max_f32_e32 v35, v38, v35 ; 20464726 v_mad_f32 v15, v20, v15, v24 ; D282000F 04621F14 v_mad_f32 v16, v20, v16, v25 ; D2820010 04662114 v_mov_b32_e32 v24, 0xb58637bd ; 7E3002FF B58637BD v_add_f32_e64 v25, |v32|, v24 ; D2060119 00023120 v_cmp_gt_f32_e32 vcc, 0, v25 ; 7C083280 v_mad_f32 v20, v20, v34, v26 ; D2820014 046A4514 v_mul_f32_e32 v25, s9, v1 ; 10320209 v_mul_f32_e32 v26, s10, v1 ; 1034020A v_mul_f32_e32 v1, s8, v1 ; 10020208 v_cndmask_b32_e64 v25, v25, 0, vcc ; D2000019 01A90119 v_cndmask_b32_e64 v26, v26, 0, vcc ; D200001A 01A9011A v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 v_mul_f32_e32 v32, s11, v36 ; 1040480B v_mul_f32_e32 v34, s11, v37 ; 10444A0B v_mul_f32_e32 v35, s11, v35 ; 1046460B v_exp_f32_e32 v32, v32 ; 7E404B20 v_exp_f32_e32 v34, v34 ; 7E444B22 v_exp_f32_e32 v35, v35 ; 7E464B23 v_mad_f32 v32, v22, v32, -v25 ; D2820020 84664116 s_buffer_load_dword s7, s[0:3], 0x5c ; C203815C v_mad_f32 v34, v22, v34, -v26 ; D2820022 846A4516 v_mad_f32 v22, v22, v35, -v1 ; D2820016 84064716 v_mul_f32_e32 v35, v6, v6 ; 10460D06 v_mad_f32 v35, v7, v7, v35 ; D2820023 048E0F07 v_mad_f32 v35, v8, v8, v35 ; D2820023 048E1108 v_rsq_clamp_f32_e32 v35, v35 ; 7E465923 s_buffer_load_dword s8, s[0:3], 0x60 ; C2040160 s_buffer_load_dword s9, s[0:3], 0x61 ; C2048161 v_mov_b32_e32 v36, 0x41000000 ; 7E4802FF 41000000 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v36, s7, v21, v36 ; D2820024 04922A07 v_mul_f32_e32 v21, s7, v21 ; 102A2A07 v_min_f32_e32 v35, 0x7f7fffff, v35 ; 1E4646FF 7F7FFFFF v_mul_f32_e32 v37, v3, v3 ; 104A0703 v_mad_f32 v37, v4, v4, v37 ; D2820025 04960904 v_mad_f32 v37, v5, v5, v37 ; D2820025 04960B05 v_rsq_clamp_f32_e32 v37, v37 ; 7E4A5925 v_mul_f32_e32 v6, v35, v6 ; 100C0D23 v_mul_f32_e32 v7, v35, v7 ; 100E0F23 v_mul_f32_e32 v8, v35, v8 ; 10101123 v_min_f32_e32 v35, 0x7f7fffff, v37 ; 1E464AFF 7F7FFFFF v_mul_f32_e32 v3, v35, v3 ; 10060723 v_mul_f32_e32 v4, v35, v4 ; 10080923 v_mul_f32_e32 v5, v35, v5 ; 100A0B23 v_mul_f32_e32 v35, v4, v8 ; 10461104 v_mad_f32 v35, v7, v5, -v35 ; D2820023 848E0B07 v_mul_f32_e32 v37, v27, v6 ; 104A0D1B v_mad_f32 v37, v7, v28, v37 ; D2820025 04963907 v_mul_f32_e32 v7, v3, v7 ; 100E0F03 v_mad_f32 v7, v6, v4, -v7 ; D2820007 841E0906 v_mul_f32_e32 v6, v5, v6 ; 100C0D05 v_mad_f32 v6, v8, v3, -v6 ; D2820006 841A0708 v_mul_f32_e32 v3, v27, v3 ; 1006071B s_buffer_load_dword s7, s[0:3], 0x5b ; C203815B v_mad_f32 v3, v4, v28, v3 ; D2820003 040E3904 v_mul_f32_e32 v4, v9, v35 ; 10084709 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v4, v27, v4 ; 1008091B v_mad_f32 v4, v6, v28, v4 ; D2820004 04123906 v_mul_f32_e32 v6, v12, v27 ; 100C370C v_mad_f32 v6, v28, v13, v6 ; D2820006 041A1B1C s_buffer_load_dword s10, s[0:3], 0x49 ; C2050149 image_sample v[10:12], 7, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[28:35], s[24:27] ; F0800700 00C70A0A s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v10, s7, v10 ; 10141407 v_mul_f32_e32 v11, s7, v11 ; 10161607 v_mul_f32_e32 v12, s7, v12 ; 10181807 v_add_f32_e32 v13, v25, v25 ; 061A3319 v_mad_f32 v13, s4, v32, v13 ; D282000D 04364004 v_add_f32_e32 v25, v26, v26 ; 0632351A v_mad_f32 v25, s4, v34, v25 ; D2820019 04664404 v_add_f32_e32 v1, v1, v1 ; 06020301 v_mad_f32 v1, s4, v22, v1 ; D2820001 04062C04 s_buffer_load_dword s4, s[0:3], 0x47 ; C2020147 v_mul_f32_e32 v17, s10, v17 ; 1022220A v_mul_f32_e32 v18, s10, v18 ; 1024240A s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 v_mul_f32_e32 v19, s10, v19 ; 1026260A v_mul_f32_e32 v17, v17, v23 ; 10222F11 s_buffer_load_dword s10, s[0:3], 0x1 ; C2050101 v_mul_f32_e32 v18, v18, v23 ; 10242F12 v_mul_f32_e32 v19, v19, v23 ; 10262F13 s_buffer_load_dword s11, s[0:3], 0x2 ; C2058102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, -v17, s4, v13 ; D282000D 24340911 s_buffer_load_dword s12, s[0:3], 0x5a ; C206015A s_buffer_load_dword s13, s[0:3], 0x10 ; C2068110 s_buffer_load_dword s14, s[0:3], 0x11 ; C2070111 v_mov_b32_e32 v22, s7 ; 7E2C0207 v_mad_f32 v17, s4, v17, v22 ; D2820011 045A2204 v_mad_f32 v13, s5, v13, v17 ; D282000D 04461A05 v_mad_f32 v17, -v18, s4, v25 ; D2820011 24640912 v_mov_b32_e32 v22, s10 ; 7E2C020A v_mad_f32 v18, s4, v18, v22 ; D2820012 045A2404 v_mad_f32 v17, s5, v17, v18 ; D2820011 044A2205 v_mad_f32 v1, -v19, s4, v1 ; D2820001 24040913 v_mov_b32_e32 v18, s11 ; 7E24020B v_mad_f32 v18, s4, v19, v18 ; D2820012 044A2604 v_mad_f32 v1, s5, v1, v18 ; D2820001 044A0205 v_mul_f32_e32 v15, s6, v15 ; 101E1E06 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v15, s12, v15 ; 101E1E0C v_sub_f32_e64 v18, 1.0, s7 ; D2080012 00000EF2 v_mul_f32_e32 v15, v18, v15 ; 101E1F12 v_mul_f32_e32 v16, s6, v16 ; 10202006 v_mul_f32_e32 v16, s12, v16 ; 1020200C s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_buffer_load_dword s5, s[0:3], 0x13 ; C2028113 v_sub_f32_e64 v18, 1.0, s10 ; D2080012 000014F2 v_mul_f32_e32 v16, v18, v16 ; 10202112 v_mul_f32_e32 v18, s6, v20 ; 10242806 v_mul_f32_e32 v18, s12, v18 ; 1024240C v_sub_f32_e64 v19, 1.0, s11 ; D2080013 000016F2 v_mul_f32_e32 v18, v19, v18 ; 10242513 s_buffer_load_dword s7, s[0:3], 0x14 ; C2038114 s_buffer_load_dword s10, s[0:3], 0x15 ; C2050115 s_buffer_load_dword s11, s[0:3], 0x16 ; C2058116 v_mov_b32_e32 v19, s13 ; 7E26020D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v15, s5, v15, v19 ; D282000F 044E1E05 v_mov_b32_e32 v19, s14 ; 7E26020E v_mad_f32 v16, s5, v16, v19 ; D2820010 044E2005 v_mov_b32_e32 v19, s4 ; 7E260204 v_mad_f32 v18, s5, v18, v19 ; D2820012 044E2405 v_mad_f32 v8, v8, v29, v37 ; D2820008 04963B08 v_mad_f32 v3, v5, v29, v3 ; D2820003 040E3B05 v_mul_f32_e32 v5, v9, v7 ; 100A0F09 v_mad_f32 v4, v5, v29, v4 ; D2820004 04123B05 v_mul_f32_e32 v5, v30, v14 ; 100A1D1E v_mad_f32 v0, v0, v5, v31 ; D2820000 047E0B00 v_mad_f32 v5, v29, v5, v6 ; D2820005 041A0B1D v_mul_f32_e32 v6, v8, v8 ; 100C1108 v_mov_b32_e32 v7, 0x40400000 ; 7E0E02FF 40400000 v_mad_f32 v6, v7, v6, -1.0 ; D2820006 03CE0D07 v_mul_f32_e32 v7, v8, v4 ; 100E0908 v_mul_f32_e32 v9, v4, v4 ; 10120904 v_mul_f32_e32 v14, v4, v3 ; 101C0704 v_mov_b32_e32 v19, 0xbf82fc67 ; 7E2602FF BF82FC67 v_mul_f32_e32 v4, v19, v4 ; 10080913 v_mul_f32_e32 v19, v19, v3 ; 10260713 v_mad_f32 v9, v3, v3, -v9 ; D2820009 84260703 v_mul_f32_e32 v3, v8, v3 ; 10060708 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_buffer_load_dword s5, s[0:3], 0x20 ; C2028120 v_mov_b32_e32 v20, 0xbf5bab79 ; 7E2802FF BF5BAB79 v_mul_f32_e32 v7, v20, v7 ; 100E0F14 v_mul_f32_e32 v3, v20, v3 ; 10060714 s_buffer_load_dword s12, s[0:3], 0x29 ; C2060129 s_buffer_load_dword s13, s[0:3], 0x28 ; C2068128 v_mul_f32_e32 v6, 0x3e7da73e, v6 ; 100C0CFF 3E7DA73E s_buffer_load_dword s14, s[0:3], 0x30 ; C2070130 s_buffer_load_dword s15, s[0:3], 0x31 ; C2078131 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v20, s4, v6 ; 10280C04 v_mad_f32 v20, s5, v7, v20 ; D2820014 04520E05 s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 s_buffer_load_dword s5, s[0:3], 0x2a ; C202812A s_buffer_load_dword s16, s[0:3], 0x2b ; C208012B v_mul_f32_e32 v22, s12, v6 ; 102C0C0C v_mad_f32 v22, s13, v7, v22 ; D2820016 045A0E0D s_buffer_load_dword s12, s[0:3], 0x32 ; C2060132 s_buffer_load_dword s13, s[0:3], 0x33 ; C2068133 v_mul_f32_e32 v6, s15, v6 ; 100C0C0F v_mad_f32 v6, s14, v7, v6 ; D2820006 041A0E0E s_buffer_load_dword s14, s[0:3], 0x23 ; C2070123 s_buffer_load_dword s15, s[0:3], 0x24 ; C2078124 s_buffer_load_dword s17, s[0:3], 0x25 ; C2088125 s_buffer_load_dword s18, s[0:3], 0x26 ; C2090126 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s4, v3, v20 ; D2820007 04520604 v_mad_f32 v20, s5, v3, v22 ; D2820014 045A0605 v_mad_f32 v3, s12, v3, v6 ; D2820003 041A060C v_mul_f32_e32 v6, 0x3edbab79, v9 ; 100C12FF 3EDBAB79 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_buffer_load_dword s5, s[0:3], 0x1c ; C202811C v_mad_f32 v7, s14, v6, v7 ; D2820007 041E0C0E v_mad_f32 v9, s16, v6, v20 ; D2820009 04520C10 v_mad_f32 v3, s13, v6, v3 ; D2820003 040E0C0D v_mul_f32_e32 v6, 0x3f82fc67, v8 ; 100C10FF 3F82FC67 s_buffer_load_dword s12, s[0:3], 0x2d ; C206012D s_buffer_load_dword s13, s[0:3], 0x2c ; C206812C s_buffer_load_dword s14, s[0:3], 0x1e ; C207011E s_buffer_load_dword s16, s[0:3], 0x1f ; C208011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s4, v6 ; 10100C04 v_mad_f32 v8, s5, v4, v8 ; D2820008 04220805 v_mul_f32_e32 v20, s17, v6 ; 10280C11 v_mad_f32 v20, s15, v4, v20 ; D2820014 0452080F s_buffer_load_dword s4, s[0:3], 0x2e ; C202012E s_buffer_load_dword s5, s[0:3], 0x2f ; C202812F v_mul_f32_e32 v6, s12, v6 ; 100C0C0C v_mad_f32 v4, s13, v4, v6 ; D2820004 041A080D s_buffer_load_dword s12, s[0:3], 0x27 ; C2060127 v_mad_f32 v6, s14, v19, v8 ; D2820006 0422260E v_mad_f32 v8, s18, v19, v20 ; D2820008 04522612 s_buffer_load_dword s13, s[0:3], 0x18 ; C2068118 s_buffer_load_dword s14, s[0:3], 0x1a ; C207011A s_buffer_load_dword s15, s[0:3], 0x19 ; C2078119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s4, v19, v4 ; D2820004 04122604 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 v_mul_f32_e32 v14, 0x3f5bab79, v14 ; 101C1CFF 3F5BAB79 v_mad_f32 v6, s16, v14, v6 ; D2820006 041A1C10 v_mad_f32 v8, s12, v14, v8 ; D2820008 04221C0C v_mad_f32 v4, s5, v14, v4 ; D2820004 04121C05 v_mov_b32_e32 v14, 0x3f62dfd0 ; 7E1C02FF 3F62DFD0 v_mad_f32 v6, s13, v14, v6 ; D2820006 041A1C0D v_mad_f32 v4, s14, v14, v4 ; D2820004 04121C0E v_mad_f32 v8, s15, v14, v8 ; D2820008 04221C0F v_mul_f32_e32 v10, s6, v10 ; 10141406 v_mov_b32_e32 v14, s7 ; 7E1C0207 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, s4, v10, v14 ; D282000A 043A1404 v_mul_f32_e32 v11, s6, v11 ; 10161606 v_mov_b32_e32 v14, s10 ; 7E1C020A v_mad_f32 v11, s4, v11, v14 ; D282000B 043A1604 v_mul_f32_e32 v12, s6, v12 ; 10181806 v_mov_b32_e32 v14, s11 ; 7E1C020B v_mad_f32 v12, s4, v12, v14 ; D282000C 043A1804 s_buffer_load_dword s4, s[0:3], 0x62 ; C2020162 s_buffer_load_dword s5, s[0:3], 0x64 ; C2028164 s_buffer_load_dword s6, s[0:3], 0x65 ; C2030165 s_buffer_load_dword s0, s[0:3], 0x66 ; C2000166 v_add_f32_e32 v6, v6, v7 ; 060C0F06 v_mul_f32_e32 v7, v6, v15 ; 100E1F06 v_cmp_gt_f32_e32 vcc, 0, v6 ; 7C080C80 v_cndmask_b32_e64 v6, v7, 0, vcc ; D2000006 01A90107 v_add_f32_e32 v7, v8, v9 ; 060E1308 v_mul_f32_e32 v8, v7, v16 ; 10102107 v_cmp_gt_f32_e32 vcc, 0, v7 ; 7C080E80 v_cndmask_b32_e64 v7, v8, 0, vcc ; D2000007 01A90108 v_add_f32_e32 v3, v4, v3 ; 06060704 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_and_b32_e32 v4, v0, v33 ; 36084300 v_log_f32_e32 v4, v4 ; 7E084F04 v_mul_f32_e32 v8, v3, v18 ; 10102503 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v3, v8, 0, vcc ; D2000003 01A90108 v_mul_legacy_f32_e32 v4, v21, v4 ; 0E080915 v_exp_f32_e32 v4, v4 ; 7E084B04 v_mul_f32_e32 v4, v4, v36 ; 10084904 v_mul_f32_e32 v4, 0x3d22f983, v4 ; 100808FF 3D22F983 v_mul_f32_e32 v8, v4, v10 ; 10101504 v_mul_f32_e32 v9, v4, v11 ; 10121704 v_mul_f32_e32 v4, v4, v12 ; 10081904 v_add_f32_e32 v0, v24, v0 ; 06000118 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v8, 0, vcc ; D2000000 01A90108 v_cndmask_b32_e64 v8, v9, 0, vcc ; D2000008 01A90109 v_cndmask_b32_e64 v4, v4, 0, vcc ; D2000004 01A90104 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_add_f32_e32 v9, v24, v5 ; 06120B18 v_mul_f32_e32 v10, v15, v5 ; 10140B0F v_mul_f32_e32 v11, v16, v5 ; 10160B10 v_mul_f32_e32 v5, v18, v5 ; 100A0B12 v_cmp_gt_f32_e32 vcc, 0, v9 ; 7C081280 v_cndmask_b32_e64 v9, v10, 0, vcc ; D2000009 01A9010A v_cndmask_b32_e64 v10, v11, 0, vcc ; D200000A 01A9010B v_cndmask_b32_e64 v5, v5, 0, vcc ; D2000005 01A90105 v_add_f32_e32 v6, v6, v13 ; 060C1B06 v_add_f32_e32 v7, v7, v17 ; 060E2307 v_add_f32_e32 v1, v3, v1 ; 06020303 v_add_f32_e32 v0, v9, v0 ; 06000109 v_add_f32_e32 v3, v10, v8 ; 0606110A v_add_f32_e32 v4, v5, v4 ; 06080905 v_mad_f32 v0, v0, s8, v6 ; D2820000 04181100 v_mad_f32 v3, v3, s9, v7 ; D2820003 041C1303 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 v_mad_f32 v0, v15, s5, v0 ; D2820000 04000B0F v_mad_f32 v3, v16, s6, v3 ; D2820003 040C0D10 v_mad_f32 v1, v18, s0, v1 ; D2820001 04040112 v_cvt_pkrtz_f16_f32_e32 v0, v0, v3 ; 5E000700 v_mul_f32_e32 v2, 0x3b800000, v2 ; 100404FF 3B800000 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 40 Code Size: 2464 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], TEXCOORD[0] DCL CONST[0..3] DCL TEMP[0] 0: MUL TEMP[0], CONST[1], IN[0].yyyy 1: MAD TEMP[0], CONST[0], IN[0].xxxx, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV OUT[1], TEMP[0] 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %17, %34 %38 = fmul float %18, %34 %39 = fmul float %19, %34 %40 = fmul float %20, %34 %41 = fmul float %13, %33 %42 = fadd float %41, %37 %43 = fmul float %14, %33 %44 = fadd float %43, %38 %45 = fmul float %15, %33 %46 = fadd float %45, %39 %47 = fmul float %16, %33 %48 = fadd float %47, %40 %49 = fmul float %21, %35 %50 = fadd float %49, %42 %51 = fmul float %22, %35 %52 = fadd float %51, %44 %53 = fmul float %23, %35 %54 = fadd float %53, %46 %55 = fmul float %24, %35 %56 = fadd float %55, %48 %57 = fmul float %25, %36 %58 = fadd float %57, %50 %59 = fmul float %26, %36 %60 = fadd float %59, %52 %61 = fmul float %27, %36 %62 = fadd float %61, %54 %63 = fmul float %28, %36 %64 = fadd float %63, %56 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %58, float %60, float %62, float %64) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %60, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s8, v1 ; 10080208 v_mad_f32 v4, s4, v0, v4 ; D2820004 04120004 v_mul_f32_e32 v5, s9, v1 ; 100A0209 v_mad_f32 v5, s5, v0, v5 ; D2820005 04160005 v_mul_f32_e32 v6, s10, v1 ; 100C020A v_mad_f32 v6, s6, v0, v6 ; D2820006 041A0006 v_mul_f32_e32 v1, s11, v1 ; 1002020B v_mad_f32 v0, s7, v0, v1 ; D2820000 04060007 v_mad_f32 v1, s12, v2, v4 ; D2820001 0412040C v_mad_f32 v4, s13, v2, v5 ; D2820004 0416040D v_mad_f32 v5, s14, v2, v6 ; D2820005 041A040E v_mad_f32 v0, s15, v2, v0 ; D2820000 0402040F v_mad_f32 v1, s16, v3, v1 ; D2820001 04060610 v_mad_f32 v2, s17, v3, v4 ; D2820002 04120611 v_mad_f32 v4, s18, v3, v5 ; D2820004 04160612 v_mad_f32 v0, s0, v3, v0 ; D2820000 04020600 exp 15, 32, 0, 0, 0, v1, v2, v4, v0 ; F800020F 00040201 exp 15, 12, 0, 1, 0, v1, v2, v4, v0 ; F80008CF 00040201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 228 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..16] DCL TEMP[0..8] DCL TEMP[9], LOCAL IMM[0] FLT32 { 256.0000, -1.0000, 0.5000, 1.5000} IMM[1] FLT32 { 0.2500, 0.0000, -0.0000, 340282346638528859811704183484516925440.0000} IMM[2] FLT32 { -0.9990, 0.5000, 2.5000, 1.5000} 0: RCP TEMP[0].x, IN[0].wwww 1: MUL TEMP[0].xy, TEMP[0].xxxx, IN[0] 2: MAD TEMP[0].zw, TEMP[0].xyxy, CONST[1].xyxy, CONST[1].xywz 3: TEX TEMP[1], TEMP[0].zwzw, SAMP[0], 2D 4: MUL TEMP[0].z, TEMP[1].wwww, IMM[0].xxxx 5: MUL TEMP[0].xy, TEMP[0].zzzz, TEMP[0] 6: MUL TEMP[1], TEMP[0].yyyy, CONST[11] 7: MAD TEMP[1], CONST[10], TEMP[0].xxxx, TEMP[1] 8: MAD TEMP[1], CONST[12], TEMP[0].zzzz, TEMP[1] 9: ADD TEMP[1], TEMP[1], CONST[13] 10: RCP TEMP[0].w, TEMP[1].wwww 11: MUL TEMP[1].xy, TEMP[0].wwww, TEMP[1] 12: MAX TEMP[0].w, IMM[2].xxxx, -TEMP[1].zzzz 13: MUL TEMP[1].zw, TEMP[1].xyxy, CONST[16].xyxy 14: MOV TEMP[2].y, IMM[0].yyyy 15: MAD TEMP[1].xy, TEMP[1], CONST[16], TEMP[2].yyyy 16: FRC TEMP[1].zw, TEMP[1] 17: ADD TEMP[1].xy, -TEMP[1].zwzw, TEMP[1] 18: ADD TEMP[2].xz, TEMP[1].xyyw, IMM[2].zzzz 19: RCP TEMP[3].x, CONST[16].xxxx 20: RCP TEMP[3].y, CONST[16].yyyy 21: MUL TEMP[2].xz, TEMP[2], TEMP[3].xyyw 22: TEX TEMP[4], TEMP[2].xzzw, SAMP[1], 2D 23: ADD TEMP[2].x, TEMP[0].wwww, TEMP[4].xxxx 24: MAD_SAT TEMP[4].z, TEMP[2].xxxx, CONST[16].zzzz, -TEMP[2].yyyy 25: ADD TEMP[5], TEMP[1].xyxy, IMM[2].zyzw 26: MUL TEMP[5], TEMP[3].xyxy, TEMP[5] 27: TEX TEMP[6], TEMP[5], SAMP[1], 2D 28: TEX TEMP[5], TEMP[5].zwzw, SAMP[1], 2D 29: ADD TEMP[2].x, TEMP[0].wwww, TEMP[5].xxxx 30: MAD_SAT TEMP[4].y, TEMP[2].xxxx, CONST[16].zzzz, -TEMP[2].yyyy 31: ADD TEMP[2].x, TEMP[0].wwww, TEMP[6].xxxx 32: MAD_SAT TEMP[4].x, TEMP[2].xxxx, CONST[16].zzzz, -TEMP[2].yyyy 33: ADD TEMP[5], TEMP[1].xyxy, IMM[2].yzwy 34: MUL TEMP[5], TEMP[3].xyxy, TEMP[5] 35: TEX TEMP[6], TEMP[5].zwzw, SAMP[1], 2D 36: TEX TEMP[5], TEMP[5], SAMP[1], 2D 37: ADD TEMP[2].x, TEMP[0].wwww, TEMP[5].xxxx 38: MAD_SAT TEMP[5].z, TEMP[2].xxxx, CONST[16].zzzz, -TEMP[2].yyyy 39: ADD TEMP[2].x, TEMP[0].wwww, TEMP[6].xxxx 40: MAD_SAT TEMP[6].x, TEMP[2].xxxx, CONST[16].zzzz, -TEMP[2].yyyy 41: ADD TEMP[7], TEMP[1].xyxy, IMM[2].wwwz 42: ADD TEMP[8], TEMP[1].xyxy, IMM[0].zzzw 43: MUL TEMP[8], TEMP[3].xyxy, TEMP[8] 44: MUL TEMP[3], TEMP[3].xyxy, TEMP[7] 45: TEX TEMP[7], TEMP[3], SAMP[1], 2D 46: TEX TEMP[3], TEMP[3].zwzw, SAMP[1], 2D 47: ADD TEMP[1].x, TEMP[0].wwww, TEMP[3].xxxx 48: MAD_SAT TEMP[6].z, TEMP[1].xxxx, CONST[16].zzzz, -TEMP[2].yyyy 49: ADD TEMP[1].x, TEMP[0].wwww, TEMP[7].xxxx 50: MAD_SAT TEMP[6].y, TEMP[1].xxxx, CONST[16].zzzz, -TEMP[2].yyyy 51: LRP TEMP[3], TEMP[1].zzzz, TEMP[4].xyyz, TEMP[6].xyyz 52: LRP TEMP[2].xz, TEMP[1].wwww, TEMP[3].yyww, TEMP[3] 53: TEX TEMP[3], TEMP[8], SAMP[1], 2D 54: TEX TEMP[4], TEMP[8].zwzw, SAMP[1], 2D 55: ADD TEMP[1].x, TEMP[0].wwww, TEMP[4].xxxx 56: ADD TEMP[0].w, TEMP[0].wwww, TEMP[3].xxxx 57: MAD_SAT TEMP[5].x, TEMP[0].wwww, CONST[16].zzzz, -TEMP[2].yyyy 58: MAD_SAT TEMP[5].y, TEMP[1].xxxx, CONST[16].zzzz, -TEMP[2].yyyy 59: LRP TEMP[3], TEMP[1].zzzz, TEMP[6].xyyz, TEMP[5].xyyz 60: LRP TEMP[4].xy, TEMP[1].wwww, TEMP[3].ywzw, TEMP[3].xzzw 61: ADD TEMP[0].w, TEMP[4].yyyy, TEMP[4].xxxx 62: ADD TEMP[0].w, TEMP[2].xxxx, TEMP[0].wwww 63: ADD TEMP[0].w, TEMP[2].zzzz, TEMP[0].wwww 64: MUL_SAT TEMP[0].w, TEMP[0].wwww, IMM[1].xxxx 65: MUL TEMP[0].w, TEMP[0].wwww, TEMP[0].wwww 66: MUL TEMP[1].xyz, TEMP[0].yyyy, CONST[7] 67: MAD TEMP[1].xyz, CONST[6], TEMP[0].xxxx, TEMP[1] 68: MAD TEMP[0].xyz, CONST[8], TEMP[0].zzzz, TEMP[1] 69: ADD TEMP[0].xyz, TEMP[0], CONST[9] 70: ADD TEMP[0].xyz, -TEMP[0], CONST[14] 71: MUL TEMP[0].xyz, TEMP[0], CONST[14].wwww 72: DP3 TEMP[0].x, TEMP[0], TEMP[0] 73: RSQ TEMP[9], |TEMP[0].xxxx| 74: MIN TEMP[0].x, IMM[1].wwww, TEMP[9] 75: RCP TEMP[0].x, TEMP[0].xxxx 76: MAD TEMP[0].x, TEMP[0].xxxx, CONST[15].yyyy, CONST[15].zzzz 77: MAX TEMP[1].x, TEMP[0].xxxx, IMM[1].yyyy 78: MAD TEMP[0].x, TEMP[1].xxxx, -TEMP[1].xxxx, -IMM[0].yyyy 79: MAX TEMP[1].x, TEMP[0].xxxx, IMM[1].yyyy 80: ADD TEMP[0].x, TEMP[1].xxxx, IMM[1].zzzz 81: POW TEMP[0].y, |TEMP[1].xxxx|, CONST[15].xxxx 82: ADD TEMP[1], TEMP[2].yyyy, CONST[0] 83: MAD TEMP[1], TEMP[0].yyyy, TEMP[1], -IMM[0].yyyy 84: CMP TEMP[1], TEMP[0].xxxx, -IMM[0].yyyy, TEMP[1] 85: ADD TEMP[2], -TEMP[1], -IMM[0].yyyy 86: MAD OUT[0], TEMP[0].wwww, TEMP[2], TEMP[1] 87: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 220) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %70 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %71 = load <32 x i8>, <32 x i8> addrspace(2)* %70, align 32, !tbaa !0 %72 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %73 = load <16 x i8>, <16 x i8> addrspace(2)* %72, align 16, !tbaa !0 %74 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %75 = load <8 x i32>, <8 x i32> addrspace(2)* %74, align 32, !tbaa !0 %76 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %77 = load <4 x i32>, <4 x i32> addrspace(2)* %76, align 16, !tbaa !0 %78 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %81 = fdiv float 1.000000e+00, %80 %82 = fmul float %81, %78 %83 = fmul float %81, %79 %84 = fmul float %82, %28 %85 = fadd float %84, %31 %86 = fmul float %83, %29 %87 = fadd float %86, %30 %88 = bitcast float %85 to i32 %89 = bitcast float %87 to i32 %90 = insertelement <2 x i32> undef, i32 %88, i32 0 %91 = insertelement <2 x i32> %90, i32 %89, i32 1 %92 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %91, <32 x i8> %71, <16 x i8> %73, i32 2) %93 = extractelement <4 x float> %92, i32 3 %94 = fmul float %93, 2.560000e+02 %95 = fmul float %94, %82 %96 = fmul float %94, %83 %97 = fmul float %96, %48 %98 = fmul float %96, %49 %99 = fmul float %96, %50 %100 = fmul float %96, %51 %101 = fmul float %44, %95 %102 = fadd float %101, %97 %103 = fmul float %45, %95 %104 = fadd float %103, %98 %105 = fmul float %46, %95 %106 = fadd float %105, %99 %107 = fmul float %47, %95 %108 = fadd float %107, %100 %109 = fmul float %52, %94 %110 = fadd float %109, %102 %111 = fmul float %53, %94 %112 = fadd float %111, %104 %113 = fmul float %54, %94 %114 = fadd float %113, %106 %115 = fmul float %55, %94 %116 = fadd float %115, %108 %117 = fadd float %110, %56 %118 = fadd float %112, %57 %119 = fadd float %114, %58 %120 = fadd float %116, %59 %121 = fdiv float 1.000000e+00, %120 %122 = fmul float %121, %117 %123 = fmul float %121, %118 %124 = fsub float -0.000000e+00, %119 %125 = call float @llvm.maxnum.f32(float %124, float 0xBFEFF7CEE0000000) %126 = fmul float %122, %67 %127 = fmul float %123, %68 %128 = fmul float %122, %67 %129 = fadd float %128, -1.000000e+00 %130 = fmul float %123, %68 %131 = fadd float %130, -1.000000e+00 %132 = call float @llvm.AMDIL.fraction.(float %126) %133 = call float @llvm.AMDIL.fraction.(float %127) %134 = fsub float %129, %132 %135 = fsub float %131, %133 %136 = fadd float %134, 2.500000e+00 %137 = fadd float %135, 2.500000e+00 %138 = fdiv float 1.000000e+00, %67 %139 = fdiv float 1.000000e+00, %68 %140 = fmul float %136, %138 %141 = fmul float %137, %139 %142 = bitcast float %140 to i32 %143 = bitcast float %141 to i32 %144 = insertelement <2 x i32> undef, i32 %142, i32 0 %145 = insertelement <2 x i32> %144, i32 %143, i32 1 %146 = bitcast <8 x i32> %75 to <32 x i8> %147 = bitcast <4 x i32> %77 to <16 x i8> %148 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %145, <32 x i8> %146, <16 x i8> %147, i32 2) %149 = extractelement <4 x float> %148, i32 0 %150 = fadd float %125, %149 %151 = fmul float %150, %69 %152 = fadd float %151, 1.000000e+00 %153 = call float @llvm.AMDIL.clamp.(float %152, float 0.000000e+00, float 1.000000e+00) %154 = fadd float %134, 2.500000e+00 %155 = fadd float %135, 5.000000e-01 %156 = fadd float %134, 2.500000e+00 %157 = fadd float %135, 1.500000e+00 %158 = fmul float %138, %154 %159 = fmul float %139, %155 %160 = fmul float %138, %156 %161 = fmul float %139, %157 %162 = bitcast float %158 to i32 %163 = bitcast float %159 to i32 %164 = insertelement <2 x i32> undef, i32 %162, i32 0 %165 = insertelement <2 x i32> %164, i32 %163, i32 1 %166 = bitcast <8 x i32> %75 to <32 x i8> %167 = bitcast <4 x i32> %77 to <16 x i8> %168 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %165, <32 x i8> %166, <16 x i8> %167, i32 2) %169 = extractelement <4 x float> %168, i32 0 %170 = bitcast float %160 to i32 %171 = bitcast float %161 to i32 %172 = insertelement <2 x i32> undef, i32 %170, i32 0 %173 = insertelement <2 x i32> %172, i32 %171, i32 1 %174 = bitcast <8 x i32> %75 to <32 x i8> %175 = bitcast <4 x i32> %77 to <16 x i8> %176 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %173, <32 x i8> %174, <16 x i8> %175, i32 2) %177 = extractelement <4 x float> %176, i32 0 %178 = fadd float %125, %177 %179 = fmul float %178, %69 %180 = fadd float %179, 1.000000e+00 %181 = call float @llvm.AMDIL.clamp.(float %180, float 0.000000e+00, float 1.000000e+00) %182 = fadd float %125, %169 %183 = fmul float %182, %69 %184 = fadd float %183, 1.000000e+00 %185 = call float @llvm.AMDIL.clamp.(float %184, float 0.000000e+00, float 1.000000e+00) %186 = fadd float %134, 5.000000e-01 %187 = fadd float %135, 2.500000e+00 %188 = fadd float %134, 1.500000e+00 %189 = fadd float %135, 5.000000e-01 %190 = fmul float %138, %186 %191 = fmul float %139, %187 %192 = fmul float %138, %188 %193 = fmul float %139, %189 %194 = bitcast float %192 to i32 %195 = bitcast float %193 to i32 %196 = insertelement <2 x i32> undef, i32 %194, i32 0 %197 = insertelement <2 x i32> %196, i32 %195, i32 1 %198 = bitcast <8 x i32> %75 to <32 x i8> %199 = bitcast <4 x i32> %77 to <16 x i8> %200 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %197, <32 x i8> %198, <16 x i8> %199, i32 2) %201 = extractelement <4 x float> %200, i32 0 %202 = bitcast float %190 to i32 %203 = bitcast float %191 to i32 %204 = insertelement <2 x i32> undef, i32 %202, i32 0 %205 = insertelement <2 x i32> %204, i32 %203, i32 1 %206 = bitcast <8 x i32> %75 to <32 x i8> %207 = bitcast <4 x i32> %77 to <16 x i8> %208 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %205, <32 x i8> %206, <16 x i8> %207, i32 2) %209 = extractelement <4 x float> %208, i32 0 %210 = fadd float %125, %209 %211 = fmul float %210, %69 %212 = fadd float %211, 1.000000e+00 %213 = call float @llvm.AMDIL.clamp.(float %212, float 0.000000e+00, float 1.000000e+00) %214 = fadd float %125, %201 %215 = fmul float %214, %69 %216 = fadd float %215, 1.000000e+00 %217 = call float @llvm.AMDIL.clamp.(float %216, float 0.000000e+00, float 1.000000e+00) %218 = fadd float %134, 1.500000e+00 %219 = fadd float %135, 1.500000e+00 %220 = fadd float %134, 1.500000e+00 %221 = fadd float %135, 2.500000e+00 %222 = fadd float %134, 5.000000e-01 %223 = fadd float %135, 5.000000e-01 %224 = fadd float %134, 5.000000e-01 %225 = fadd float %135, 1.500000e+00 %226 = fmul float %138, %222 %227 = fmul float %139, %223 %228 = fmul float %138, %224 %229 = fmul float %139, %225 %230 = fmul float %138, %218 %231 = fmul float %139, %219 %232 = fmul float %138, %220 %233 = fmul float %139, %221 %234 = bitcast float %230 to i32 %235 = bitcast float %231 to i32 %236 = insertelement <2 x i32> undef, i32 %234, i32 0 %237 = insertelement <2 x i32> %236, i32 %235, i32 1 %238 = bitcast <8 x i32> %75 to <32 x i8> %239 = bitcast <4 x i32> %77 to <16 x i8> %240 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %237, <32 x i8> %238, <16 x i8> %239, i32 2) %241 = extractelement <4 x float> %240, i32 0 %242 = bitcast float %232 to i32 %243 = bitcast float %233 to i32 %244 = insertelement <2 x i32> undef, i32 %242, i32 0 %245 = insertelement <2 x i32> %244, i32 %243, i32 1 %246 = bitcast <8 x i32> %75 to <32 x i8> %247 = bitcast <4 x i32> %77 to <16 x i8> %248 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %245, <32 x i8> %246, <16 x i8> %247, i32 2) %249 = extractelement <4 x float> %248, i32 0 %250 = fadd float %125, %249 %251 = fmul float %250, %69 %252 = fadd float %251, 1.000000e+00 %253 = call float @llvm.AMDIL.clamp.(float %252, float 0.000000e+00, float 1.000000e+00) %254 = fadd float %125, %241 %255 = fmul float %254, %69 %256 = fadd float %255, 1.000000e+00 %257 = call float @llvm.AMDIL.clamp.(float %256, float 0.000000e+00, float 1.000000e+00) %258 = call float @llvm.AMDGPU.lrp(float %132, float %185, float %217) %259 = call float @llvm.AMDGPU.lrp(float %132, float %181, float %257) %260 = call float @llvm.AMDGPU.lrp(float %132, float %181, float %257) %261 = call float @llvm.AMDGPU.lrp(float %132, float %153, float %253) %262 = call float @llvm.AMDGPU.lrp(float %133, float %259, float %258) %263 = call float @llvm.AMDGPU.lrp(float %133, float %261, float %260) %264 = bitcast float %226 to i32 %265 = bitcast float %227 to i32 %266 = insertelement <2 x i32> undef, i32 %264, i32 0 %267 = insertelement <2 x i32> %266, i32 %265, i32 1 %268 = bitcast <8 x i32> %75 to <32 x i8> %269 = bitcast <4 x i32> %77 to <16 x i8> %270 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %267, <32 x i8> %268, <16 x i8> %269, i32 2) %271 = extractelement <4 x float> %270, i32 0 %272 = bitcast float %228 to i32 %273 = bitcast float %229 to i32 %274 = insertelement <2 x i32> undef, i32 %272, i32 0 %275 = insertelement <2 x i32> %274, i32 %273, i32 1 %276 = bitcast <8 x i32> %75 to <32 x i8> %277 = bitcast <4 x i32> %77 to <16 x i8> %278 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %275, <32 x i8> %276, <16 x i8> %277, i32 2) %279 = extractelement <4 x float> %278, i32 0 %280 = fadd float %125, %279 %281 = fadd float %125, %271 %282 = fmul float %281, %69 %283 = fadd float %282, 1.000000e+00 %284 = call float @llvm.AMDIL.clamp.(float %283, float 0.000000e+00, float 1.000000e+00) %285 = fmul float %280, %69 %286 = fadd float %285, 1.000000e+00 %287 = call float @llvm.AMDIL.clamp.(float %286, float 0.000000e+00, float 1.000000e+00) %288 = call float @llvm.AMDGPU.lrp(float %132, float %217, float %284) %289 = call float @llvm.AMDGPU.lrp(float %132, float %257, float %287) %290 = call float @llvm.AMDGPU.lrp(float %132, float %257, float %287) %291 = call float @llvm.AMDGPU.lrp(float %132, float %253, float %213) %292 = call float @llvm.AMDGPU.lrp(float %133, float %289, float %288) %293 = call float @llvm.AMDGPU.lrp(float %133, float %291, float %290) %294 = fadd float %293, %292 %295 = fadd float %262, %294 %296 = fadd float %263, %295 %297 = fmul float %296, 2.500000e-01 %298 = call float @llvm.AMDIL.clamp.(float %297, float 0.000000e+00, float 1.000000e+00) %299 = fmul float %298, %298 %300 = fmul float %96, %35 %301 = fmul float %96, %36 %302 = fmul float %96, %37 %303 = fmul float %32, %95 %304 = fadd float %303, %300 %305 = fmul float %33, %95 %306 = fadd float %305, %301 %307 = fmul float %34, %95 %308 = fadd float %307, %302 %309 = fmul float %38, %94 %310 = fadd float %309, %304 %311 = fmul float %39, %94 %312 = fadd float %311, %306 %313 = fmul float %40, %94 %314 = fadd float %313, %308 %315 = fadd float %310, %41 %316 = fadd float %312, %42 %317 = fadd float %314, %43 %318 = fsub float %60, %315 %319 = fsub float %61, %316 %320 = fsub float %62, %317 %321 = fmul float %318, %63 %322 = fmul float %319, %63 %323 = fmul float %320, %63 %324 = fmul float %321, %321 %325 = fmul float %322, %322 %326 = fadd float %325, %324 %327 = fmul float %323, %323 %328 = fadd float %326, %327 %329 = call float @fabs(float %328) %330 = call float @llvm.AMDGPU.rsq.clamped.f32(float %329) %331 = call float @llvm.minnum.f32(float %330, float 0x47EFFFFFE0000000) %332 = fdiv float 1.000000e+00, %331 %333 = fmul float %332, %65 %334 = fadd float %333, %66 %335 = call float @llvm.maxnum.f32(float %334, float 0.000000e+00) %336 = fmul float %335, %335 %337 = fsub float 1.000000e+00, %336 %338 = call float @llvm.maxnum.f32(float %337, float 0.000000e+00) %339 = fadd float %338, 0xBEB0C6F7A0000000 %340 = call float @fabs(float %338) %341 = call float @llvm.pow.f32(float %340, float %64) %342 = fadd float %24, -1.000000e+00 %343 = fadd float %25, -1.000000e+00 %344 = fadd float %26, -1.000000e+00 %345 = fadd float %27, -1.000000e+00 %346 = fmul float %341, %342 %347 = fadd float %346, 1.000000e+00 %348 = fmul float %341, %343 %349 = fadd float %348, 1.000000e+00 %350 = fmul float %341, %344 %351 = fadd float %350, 1.000000e+00 %352 = fmul float %341, %345 %353 = fadd float %352, 1.000000e+00 %354 = call float @llvm.AMDGPU.cndlt(float %339, float 1.000000e+00, float %347) %355 = call float @llvm.AMDGPU.cndlt(float %339, float 1.000000e+00, float %349) %356 = call float @llvm.AMDGPU.cndlt(float %339, float 1.000000e+00, float %351) %357 = call float @llvm.AMDGPU.cndlt(float %339, float 1.000000e+00, float %353) %358 = fsub float 1.000000e+00, %354 %359 = fsub float 1.000000e+00, %355 %360 = fsub float 1.000000e+00, %356 %361 = fsub float 1.000000e+00, %357 %362 = fmul float %299, %358 %363 = fadd float %362, %354 %364 = fmul float %299, %359 %365 = fadd float %364, %355 %366 = fmul float %299, %360 %367 = fadd float %366, %356 %368 = fmul float %299, %361 %369 = fadd float %368, %357 %370 = call i32 @llvm.SI.packf16(float %363, float %365) %371 = bitcast i32 %370 to float %372 = call i32 @llvm.SI.packf16(float %367, float %369) %373 = bitcast i32 %372 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %371, float %373, float %371, float %373) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v0, v0, 3, 0, [m0] ; C8000300 v_interp_p2_f32 v0, [v0], v1, 3, 0, [m0] ; C8010301 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 v_rcp_f32_e32 v0, v0 ; 7E005500 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[8:11], 0x0 ; C2000900 v_mul_f32_e32 v1, v2, v0 ; 10020102 v_mul_f32_e32 v0, v3, v0 ; 10000103 s_buffer_load_dword s1, s[8:11], 0x1 ; C2008901 s_buffer_load_dword s6, s[8:11], 0x5 ; C2030905 s_buffer_load_dword s2, s[8:11], 0x6 ; C2010906 s_buffer_load_dword s3, s[8:11], 0x7 ; C2018907 s_buffer_load_dword s7, s[8:11], 0x18 ; C2038918 s_buffer_load_dword s36, s[8:11], 0x19 ; C2120919 s_buffer_load_dword s37, s[8:11], 0x28 ; C2128928 s_buffer_load_dword s38, s[8:11], 0x29 ; C2130929 s_buffer_load_dword s39, s[8:11], 0x2a ; C213892A s_buffer_load_dword s40, s[8:11], 0x2b ; C214092B s_buffer_load_dword s41, s[8:11], 0x2c ; C214892C s_buffer_load_dword s42, s[8:11], 0x2d ; C215092D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s2 ; 7E040202 s_buffer_load_dword s43, s[8:11], 0x2e ; C215892E v_mov_b32_e32 v3, s3 ; 7E060203 s_buffer_load_dword s44, s[8:11], 0x2f ; C216092F s_buffer_load_dword s45, s[8:11], 0x30 ; C2168930 s_buffer_load_dword s46, s[8:11], 0x31 ; C2170931 s_buffer_load_dword s47, s[8:11], 0x37 ; C2178937 s_buffer_load_dword s4, s[8:11], 0x38 ; C2020938 s_buffer_load_dword s5, s[8:11], 0x39 ; C2028939 s_buffer_load_dword s3, s[8:11], 0x3a ; C201893A s_buffer_load_dword s2, s[8:11], 0x3b ; C201093B s_buffer_load_dword s48, s[8:11], 0x4 ; C2180904 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s48, v1, v3 ; D2820003 040E0230 v_mad_f32 v4, s6, v0, v2 ; D2820004 040A0006 image_sample v2, 8, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[28:35], s[24:27] ; F0800800 00C70203 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, 0x43800000, v2 ; 100404FF 43800000 v_mul_f32_e32 v1, v1, v2 ; 10020501 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_mul_f32_e32 v3, s41, v0 ; 10060029 v_mad_f32 v3, s37, v1, v3 ; D2820003 040E0225 s_buffer_load_dword s6, s[8:11], 0x33 ; C2030933 v_mul_f32_e32 v4, s42, v0 ; 1008002A v_mad_f32 v4, s38, v1, v4 ; D2820004 04120226 v_mul_f32_e32 v5, s44, v0 ; 100A002C v_mad_f32 v5, s40, v1, v5 ; D2820005 04160228 s_buffer_load_dword s24, s[8:11], 0x34 ; C20C0934 s_buffer_load_dword s25, s[8:11], 0x35 ; C20C8935 v_mad_f32 v3, s45, v2, v3 ; D2820003 040E042D s_buffer_load_dword s26, s[8:11], 0x36 ; C20D0936 s_buffer_load_dword s27, s[8:11], 0x40 ; C20D8940 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s6, v2, v5 ; D2820005 04160406 v_add_f32_e32 v5, s47, v5 ; 060A0A2F v_rcp_f32_e32 v5, v5 ; 7E0A5505 s_buffer_load_dword s6, s[8:11], 0x41 ; C2030941 v_mad_f32 v4, s46, v2, v4 ; D2820004 0412042E v_add_f32_e32 v3, s24, v3 ; 06060618 v_add_f32_e32 v4, s25, v4 ; 06080819 v_mul_f32_e32 v3, v3, v5 ; 10060B03 v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_mul_f32_e32 v5, s27, v3 ; 100A061B v_floor_f32_e32 v5, v5 ; 7E0A4905 v_mad_f32 v5, v3, s27, -v5 ; D2820005 84143703 v_mad_f32 v3, v3, s27, -1.0 ; D2820003 03CC3703 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s6, v4 ; 100C0806 v_floor_f32_e32 v6, v6 ; 7E0C4906 v_mad_f32 v6, v4, s6, -v6 ; D2820006 84180D04 v_mad_f32 v4, v4, s6, -1.0 ; D2820004 03CC0D04 v_subrev_f32_e32 v3, v5, v3 ; 0A060705 v_subrev_f32_e32 v4, v6, v4 ; 0A080906 v_rcp_f32_e32 v7, s27 ; 7E0E541B v_rcp_f32_e32 v8, s6 ; 7E105406 v_add_f32_e32 v9, 0x40200000, v3 ; 061206FF 40200000 v_add_f32_e32 v10, 0x40200000, v4 ; 061408FF 40200000 v_mul_f32_e32 v11, v7, v9 ; 10161307 v_mul_f32_e32 v12, v8, v10 ; 10181508 image_sample v9, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[16:23], s[12:15] ; F0800100 0064090B v_add_f32_e32 v10, 0.5, v4 ; 061408F0 v_mul_f32_e32 v14, v10, v8 ; 101C110A v_add_f32_e32 v10, 0.5, v3 ; 061406F0 v_mov_b32_e32 v15, v11 ; 7E1E030B v_mov_b32_e32 v16, v12 ; 7E20030C v_mov_b32_e32 v17, v11 ; 7E22030B v_mov_b32_e32 v18, v12 ; 7E24030C v_mul_f32_e32 v11, v10, v7 ; 10160F0A v_mov_b32_e32 v16, v14 ; 7E20030E v_add_f32_e32 v3, 0x3fc00000, v3 ; 060606FF 3FC00000 v_mul_f32_e32 v13, v3, v7 ; 101A0F03 v_add_f32_e32 v3, 0x3fc00000, v4 ; 060608FF 3FC00000 v_mul_f32_e32 v3, v3, v8 ; 10061103 image_sample v4, 1, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[16:23], s[12:15] ; F0800100 0064040F v_mov_b32_e32 v18, v3 ; 7E240303 v_mov_b32_e32 v7, v13 ; 7E0E030D v_mov_b32_e32 v8, v14 ; 7E10030E image_sample v10, 1, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[16:23], s[12:15] ; F0800100 00640A11 v_mov_b32_e32 v8, v12 ; 7E10030C image_sample v15, 1, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[16:23], s[12:15] ; F0800100 00640F0D v_mov_b32_e32 v16, v13 ; 7E20030D v_mov_b32_e32 v17, v14 ; 7E22030E image_sample v13, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[16:23], s[12:15] ; F0800100 00640D0B v_mov_b32_e32 v12, v14 ; 7E18030E v_mov_b32_e32 v17, v3 ; 7E220303 image_sample v14, 1, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[16:23], s[12:15] ; F0800100 00640E10 image_sample v7, 1, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[16:23], s[12:15] ; F0800100 00640707 image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[16:23], s[12:15] ; F0800100 0064080B v_mov_b32_e32 v12, v3 ; 7E180303 image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[16:23], s[12:15] ; F0800100 0064030B s_buffer_load_dword s6, s[8:11], 0x2 ; C2030902 s_buffer_load_dword s12, s[8:11], 0x3 ; C2060903 s_buffer_load_dword s13, s[8:11], 0x32 ; C2068932 s_buffer_load_dword s14, s[8:11], 0x3c ; C207093C s_buffer_load_dword s15, s[8:11], 0x3d ; C207893D s_buffer_load_dword s16, s[8:11], 0x3e ; C208093E s_buffer_load_dword s17, s[8:11], 0x1a ; C208891A s_buffer_load_dword s18, s[8:11], 0x1c ; C209091C s_buffer_load_dword s19, s[8:11], 0x1d ; C209891D s_buffer_load_dword s20, s[8:11], 0x1e ; C20A091E s_buffer_load_dword s21, s[8:11], 0x20 ; C20A8920 s_buffer_load_dword s22, s[8:11], 0x21 ; C20B0921 s_buffer_load_dword s23, s[8:11], 0x22 ; C20B8922 s_buffer_load_dword s24, s[8:11], 0x24 ; C20C0924 s_buffer_load_dword s25, s[8:11], 0x25 ; C20C8925 s_buffer_load_dword s27, s[8:11], 0x26 ; C20D8926 s_buffer_load_dword s8, s[8:11], 0x42 ; C2040942 v_mul_f32_e32 v11, s43, v0 ; 1016002B v_mad_f32 v11, s39, v1, v11 ; D282000B 042E0227 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v12, s18, v0 ; 10180012 v_mul_f32_e32 v16, s19, v0 ; 10200013 v_mul_f32_e32 v0, s20, v0 ; 10000014 v_mad_f32 v12, s7, v1, v12 ; D282000C 04320207 v_mad_f32 v16, s36, v1, v16 ; D2820010 04420224 v_mad_f32 v0, s17, v1, v0 ; D2820000 04020211 v_mad_f32 v1, s13, v2, v11 ; D2820001 042E040D v_mad_f32 v11, s21, v2, v12 ; D282000B 04320415 v_mad_f32 v12, s22, v2, v16 ; D282000C 04420416 v_mad_f32 v0, s23, v2, v0 ; D2820000 04020417 v_add_f32_e32 v1, s26, v1 ; 0602021A v_mov_b32_e32 v2, 0xbf7fbe77 ; 7E0402FF BF7FBE77 v_max_f32_e64 v1, -v1, v2 ; D2200001 20020501 v_add_f32_e32 v2, v9, v1 ; 06040309 v_add_f32_e32 v9, v10, v1 ; 0612030A v_add_f32_e32 v4, v4, v1 ; 06080304 v_add_f32_e32 v10, v13, v1 ; 0614030D v_add_f32_e32 v13, v15, v1 ; 061A030F v_add_f32_e32 v7, v7, v1 ; 060E0307 v_add_f32_e32 v14, v14, v1 ; 061C030E v_add_f32_e32 v3, v3, v1 ; 06060303 v_add_f32_e32 v1, v8, v1 ; 06020308 v_mad_f32 v2, v2, s8, 1.0 ; D2820002 03C81102 v_mad_f32 v8, v9, s8, 1.0 ; D2820008 03C81109 v_mad_f32 v4, v4, s8, 1.0 ; D2820004 03C81104 v_mad_f32 v9, v10, s8, 1.0 ; D2820009 03C8110A v_mad_f32 v10, v13, s8, 1.0 ; D282000A 03C8110D v_mad_f32 v7, v7, s8, 1.0 ; D2820007 03C81107 v_mad_f32 v13, v14, s8, 1.0 ; D282000D 03C8110E v_mad_f32 v1, v1, s8, 1.0 ; D2820001 03C81101 v_mad_f32 v3, v3, s8, 1.0 ; D2820003 03C81103 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_sub_f32_e32 v14, 1.0, v5 ; 081C0AF2 v_mul_f32_e32 v15, v10, v14 ; 101E1D0A v_mad_f32 v4, v5, v4, v15 ; D2820004 043E0905 v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 v_add_f32_e64 v13, 0, v13 clamp ; D206080D 00021A80 v_mul_f32_e32 v15, v13, v14 ; 101E1D0D v_mad_f32 v8, v5, v8, v15 ; D2820008 043E1105 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 v_mul_f32_e32 v15, v7, v14 ; 101E1D07 v_mad_f32 v2, v5, v2, v15 ; D2820002 043E0505 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v1, v1, v14 ; 10021D01 v_mad_f32 v1, v5, v10, v1 ; D2820001 04061505 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_mul_f32_e32 v3, v3, v14 ; 10061D03 v_mad_f32 v3, v5, v13, v3 ; D2820003 040E1B05 v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_mul_f32_e32 v9, v9, v14 ; 10121D09 v_mad_f32 v5, v5, v7, v9 ; D2820005 04260F05 v_add_f32_e32 v7, s24, v11 ; 060E1618 v_add_f32_e32 v9, s25, v12 ; 06121819 v_add_f32_e32 v0, s27, v0 ; 0600001B v_sub_f32_e32 v7, s4, v7 ; 080E0E04 v_sub_f32_e32 v9, s5, v9 ; 08121205 v_sub_f32_e32 v0, s3, v0 ; 08000003 v_mul_f32_e32 v7, s2, v7 ; 100E0E02 v_mul_f32_e32 v9, s2, v9 ; 10121202 v_mul_f32_e32 v0, s2, v0 ; 10000002 v_sub_f32_e32 v10, 1.0, v6 ; 08140CF2 v_mul_f32_e32 v1, v1, v10 ; 10021501 v_mad_f32 v1, v6, v3, v1 ; D2820001 04060706 v_mad_f32 v1, v10, v3, v1 ; D2820001 0406070A v_mad_f32 v1, v6, v5, v1 ; D2820001 04060B06 v_mul_f32_e32 v3, v7, v7 ; 10060F07 v_mad_f32 v3, v9, v9, v3 ; D2820003 040E1309 v_mad_f32 v0, v0, v0, v3 ; D2820000 040E0100 v_rsq_clamp_f32_e64 v0, |v0| ; D3580100 00000100 v_mad_f32 v1, v10, v4, v1 ; D2820001 0406090A v_mad_f32 v1, v6, v8, v1 ; D2820001 04061106 v_mad_f32 v1, v10, v8, v1 ; D2820001 0406110A v_min_f32_e32 v0, 0x7f7fffff, v0 ; 1E0000FF 7F7FFFFF v_rcp_f32_e32 v0, v0 ; 7E005500 v_mad_f32 v1, v6, v2, v1 ; D2820001 04060506 v_mul_f32_e32 v1, 0x3e800000, v1 ; 100202FF 3E800000 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mov_b32_e32 v2, s16 ; 7E040210 v_mad_f32 v0, s15, v0, v2 ; D2820000 040A000F v_max_f32_e32 v0, 0, v0 ; 20000080 v_mad_f32 v0, -v0, v0, 1.0 ; D2820000 23CA0100 v_max_f32_e32 v0, 0, v0 ; 20000080 v_and_b32_e32 v2, 0x7fffffff, v0 ; 360400FF 7FFFFFFF v_log_f32_e32 v2, v2 ; 7E044F02 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_mov_b32_e32 v3, 0xb58637bd ; 7E0602FF B58637BD v_add_f32_e32 v0, v0, v3 ; 06000700 v_mul_legacy_f32_e32 v2, s14, v2 ; 0E04040E v_exp_f32_e32 v2, v2 ; 7E044B02 v_add_f32_e64 v3, -1.0, s0 ; D2060003 000000F3 v_add_f32_e64 v4, -1.0, s1 ; D2060004 000002F3 v_add_f32_e64 v5, -1.0, s6 ; D2060005 00000CF3 v_add_f32_e64 v6, -1.0, s12 ; D2060006 000018F3 v_mad_f32 v3, v2, v3, 1.0 ; D2820003 03CA0702 v_mad_f32 v4, v2, v4, 1.0 ; D2820004 03CA0902 v_mad_f32 v5, v2, v5, 1.0 ; D2820005 03CA0B02 v_mad_f32 v2, v2, v6, 1.0 ; D2820002 03CA0D02 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v3, 1.0, vcc ; D2000000 01A9E503 v_cndmask_b32_e64 v3, v4, 1.0, vcc ; D2000003 01A9E504 v_cndmask_b32_e64 v4, v5, 1.0, vcc ; D2000004 01A9E505 v_cndmask_b32_e64 v2, v2, 1.0, vcc ; D2000002 01A9E502 v_sub_f32_e32 v5, 1.0, v0 ; 080A00F2 v_sub_f32_e32 v6, 1.0, v3 ; 080C06F2 v_sub_f32_e32 v7, 1.0, v4 ; 080E08F2 v_sub_f32_e32 v8, 1.0, v2 ; 081004F2 v_mad_f32 v0, v1, v5, v0 ; D2820000 04020B01 v_mad_f32 v3, v1, v6, v3 ; D2820003 040E0D01 v_mad_f32 v4, v1, v7, v4 ; D2820004 04120F01 v_mad_f32 v1, v1, v8, v2 ; D2820001 040A1101 v_cvt_pkrtz_f16_f32_e32 v0, v0, v3 ; 5E000700 v_cvt_pkrtz_f16_f32_e32 v1, v4, v1 ; 5E020304 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 20 Code Size: 1408 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..6] DCL TEMP[0..1] IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: TEX TEMP[0], IN[0], SAMP[0], 2D 1: MUL TEMP[0].xyz, TEMP[0], CONST[6] 2: MAD TEMP[0].xyz, IN[1], TEMP[0], CONST[0] 3: TEX TEMP[1], IN[0], SAMP[1], 2D 4: MUL TEMP[0].w, TEMP[1].xxxx, CONST[6].wwww 5: MUL TEMP[0].w, TEMP[0].wwww, IN[1].wwww 6: MUL OUT[0].xyz, TEMP[0].wwww, TEMP[0] 7: MOV OUT[0].w, IMM[0].xxxx 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %31 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0 %33 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %36 = bitcast <8 x i32> addrspace(2)* %35 to <32 x i8> addrspace(2)* %37 = load <32 x i8>, <32 x i8> addrspace(2)* %36, align 32, !tbaa !0 %38 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %39 = bitcast <4 x i32> addrspace(2)* %38 to <16 x i8> addrspace(2)* %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %47 = bitcast float %41 to i32 %48 = bitcast float %42 to i32 %49 = insertelement <2 x i32> undef, i32 %47, i32 0 %50 = insertelement <2 x i32> %49, i32 %48, i32 1 %51 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %50, <32 x i8> %32, <16 x i8> %34, i32 2) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = extractelement <4 x float> %51, i32 2 %55 = fmul float %52, %27 %56 = fmul float %53, %28 %57 = fmul float %54, %29 %58 = fmul float %43, %55 %59 = fadd float %58, %24 %60 = fmul float %44, %56 %61 = fadd float %60, %25 %62 = fmul float %45, %57 %63 = fadd float %62, %26 %64 = bitcast float %41 to i32 %65 = bitcast float %42 to i32 %66 = insertelement <2 x i32> undef, i32 %64, i32 0 %67 = insertelement <2 x i32> %66, i32 %65, i32 1 %68 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %67, <32 x i8> %37, <16 x i8> %40, i32 2) %69 = extractelement <4 x float> %68, i32 0 %70 = fmul float %69, %30 %71 = fmul float %70, %46 %72 = fmul float %71, %59 %73 = fmul float %71, %61 %74 = fmul float %71, %63 %75 = call i32 @llvm.SI.packf16(float %72, float %73) %76 = bitcast i32 %75 to float %77 = call i32 @llvm.SI.packf16(float %74, float 0.000000e+00) %78 = bitcast i32 %77 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %76, float %78, float %76, float %78) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_mov_b32 m0, s9 ; BEFC0309 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s12, s[0:3], 0x19 ; C2060119 s_buffer_load_dword s13, s[0:3], 0x1a ; C206811A s_buffer_load_dword s0, s[0:3], 0x1b ; C200011B v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx8 s[32:39], s[6:7], 0x8 ; C0D00708 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[16:19] ; F0800700 00860702 image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[20:23] ; F0800100 00A80102 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v2, s11, v7 ; 10040E0B v_mul_f32_e32 v3, s12, v8 ; 1006100C v_mul_f32_e32 v7, s13, v9 ; 100E120D v_mad_f32 v2, v4, v2, s8 ; D2820002 00220504 v_mad_f32 v3, v5, v3, s9 ; D2820003 00260705 v_mad_f32 v4, v6, v7, s10 ; D2820004 002A0F06 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_mul_f32_e32 v1, v2, v0 ; 10020102 v_mul_f32_e32 v2, v3, v0 ; 10040103 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 12 Code Size: 216 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL IN[2], TEXCOORD[5], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..1] DCL TEMP[0..1] DCL TEMP[2], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 0.0000, -0.0000} IMM[1] FLT32 { 0.0000, 0.7000, 10.0000, -1.7194} IMM[2] FLT32 { 2.0000, 0.5000, 6.2832, -3.1416} IMM[3] FLT32 { 40.0000, -0.1000, 0.1569, -0.1569} IMM[4] FLT32 { 256.0000, 160.0000, -255.0000, 0.0039} IMM[5] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], IMM[0].xxxx, IMM[0].yyyy 1: MAX TEMP[1].xy, |TEMP[0]|, IMM[0].zzzz 2: MUL TEMP[0].xy, TEMP[1], TEMP[1] 3: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 4: RSQ TEMP[2], |TEMP[0].xxxx| 5: MIN TEMP[0].y, IMM[5].xxxx, TEMP[2] 6: ADD TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 7: RCP_SAT TEMP[0].y, TEMP[0].yyyy 8: CMP TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx, TEMP[0].yyyy 9: MUL_SAT TEMP[0].y, IMM[1].yyyy, IN[1].xxxx 10: ADD TEMP[0].x, -TEMP[0].yyyy, TEMP[0].xxxx 11: MUL TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz 12: MUL TEMP[0].y, |TEMP[0].xxxx|, |TEMP[0].xxxx| 13: ADD TEMP[0].x, |TEMP[0].xxxx|, IMM[0].wwww 14: MUL TEMP[0].y, TEMP[0].yyyy, IMM[1].wwww 15: EX2 TEMP[0].y, TEMP[0].yyyy 16: CMP TEMP[0].x, TEMP[0].xxxx, -IMM[0].yyyy, TEMP[0].yyyy 17: MAD TEMP[0].y, TEMP[0].xxxx, IMM[2].xxxx, IMM[2].yyyy 18: FRC TEMP[0].y, TEMP[0].yyyy 19: MAD TEMP[0].y, TEMP[0].yyyy, IMM[2].zzzz, IMM[2].wwww 20: SCS TEMP[1].y, TEMP[0].yyyy 21: MAD TEMP[0].y, TEMP[1].yyyy, IMM[2].yyyy, IMM[2].yyyy 22: TEX TEMP[1], IN[0], SAMP[1], 2D 23: MAD TEMP[0].zw, TEMP[1].xyxy, IMM[0].xxxx, IMM[0].yyyy 24: LRP TEMP[1].xy, TEMP[0].yyyy, TEMP[0].zwzw, -TEMP[0].zwzw 25: ADD TEMP[0].y, -IMM[0].yyyy, -IN[1].xxxx 26: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy 27: MUL TEMP[0].xy, TEMP[1], TEMP[0].xxxx 28: MUL TEMP[0].zw, TEMP[0].xyxy, IMM[3].xxxx 29: DP2 TEMP[2].x, TEMP[0].zwzw, TEMP[0].zwzw 30: ADD TEMP[1], IMM[3].yyyy, TEMP[2].xxxx 31: KILL_IF TEMP[1] 32: RCP TEMP[0].z, IN[2].wwww 33: MUL TEMP[0].zw, TEMP[0].zzzz, IN[2].xyxy 34: MAD TEMP[0].zw, TEMP[0], CONST[1].xyxy, CONST[1].xywz 35: MAD TEMP[0].zw, TEMP[0].xyxy, IMM[3], TEMP[0] 36: MUL TEMP[0].xy, TEMP[0], IMM[4].yyyy 37: TEX TEMP[1], TEMP[0].zwzw, SAMP[0], 2D 38: MAD TEMP[0].z, TEMP[1].wwww, -IMM[4].xxxx, IN[2].wwww 39: CMP TEMP[0].xy, TEMP[0].zzzz, TEMP[0], IMM[1].xxxx 40: MAX TEMP[1].xy, TEMP[0], IMM[4].zzzz 41: ADD TEMP[0].xy, TEMP[1], IMM[4].zzzz 42: MUL TEMP[0].zw, TEMP[1].xyxy, IMM[4].wwww 43: CMP TEMP[0].xy, TEMP[0], TEMP[0].zwzw, -IMM[0].yyyy 44: MAX OUT[0].xy, TEMP[0], IMM[1].xxxx 45: CMP OUT[0].zw, TEMP[0].xyxy, -TEMP[0].xyxy, -IMM[1].xxxx 46: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %28 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, align 32, !tbaa !0 %30 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %33 = bitcast <8 x i32> addrspace(2)* %32 to <32 x i8> addrspace(2)* %34 = load <32 x i8>, <32 x i8> addrspace(2)* %33, align 32, !tbaa !0 %35 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %36 = bitcast <4 x i32> addrspace(2)* %35 to <16 x i8> addrspace(2)* %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %44 = fmul float %38, 2.000000e+00 %45 = fadd float %44, -1.000000e+00 %46 = fmul float %39, 2.000000e+00 %47 = fadd float %46, -1.000000e+00 %48 = call float @fabs(float %45) %49 = call float @llvm.maxnum.f32(float %48, float 0x3EB0C6F7A0000000) %50 = call float @fabs(float %47) %51 = call float @llvm.maxnum.f32(float %50, float 0x3EB0C6F7A0000000) %52 = fmul float %49, %49 %53 = fmul float %51, %51 %54 = fadd float %53, %52 %55 = call float @fabs(float %54) %56 = call float @llvm.AMDGPU.rsq.clamped.f32(float %55) %57 = call float @llvm.minnum.f32(float %56, float 0x47EFFFFFE0000000) %58 = fadd float %54, 0xBEB0C6F7A0000000 %59 = fdiv float 1.000000e+00, %57 %60 = call float @llvm.AMDIL.clamp.(float %59, float 0.000000e+00, float 1.000000e+00) %61 = call float @llvm.AMDGPU.cndlt(float %58, float 0.000000e+00, float %60) %62 = fmul float %40, 0x3FE6666660000000 %63 = call float @llvm.AMDIL.clamp.(float %62, float 0.000000e+00, float 1.000000e+00) %64 = fsub float %61, %63 %65 = fmul float %64, 1.000000e+01 %66 = call float @fabs(float %65) %67 = call float @fabs(float %65) %68 = fmul float %66, %67 %69 = call float @fabs(float %65) %70 = fadd float %69, 0xBEB0C6F7A0000000 %71 = fmul float %68, 0xBFFB82D0C0000000 %72 = call float @llvm.AMDIL.exp.(float %71) %73 = call float @llvm.AMDGPU.cndlt(float %70, float 1.000000e+00, float %72) %74 = fmul float %73, 2.000000e+00 %75 = fadd float %74, 5.000000e-01 %76 = call float @llvm.AMDIL.fraction.(float %75) %77 = fmul float %76, 0x401921FB60000000 %78 = fadd float %77, 0xC00921FB60000000 %79 = call float @llvm.sin.f32(float %78) %80 = fmul float %79, 5.000000e-01 %81 = fadd float %80, 5.000000e-01 %82 = bitcast float %38 to i32 %83 = bitcast float %39 to i32 %84 = insertelement <2 x i32> undef, i32 %82, i32 0 %85 = insertelement <2 x i32> %84, i32 %83, i32 1 %86 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %85, <32 x i8> %34, <16 x i8> %37, i32 2) %87 = extractelement <4 x float> %86, i32 0 %88 = extractelement <4 x float> %86, i32 1 %89 = fmul float %87, 2.000000e+00 %90 = fadd float %89, -1.000000e+00 %91 = fmul float %88, 2.000000e+00 %92 = fadd float %91, -1.000000e+00 %93 = fsub float -0.000000e+00, %90 %94 = call float @llvm.AMDGPU.lrp(float %81, float %90, float %93) %95 = fsub float -0.000000e+00, %92 %96 = call float @llvm.AMDGPU.lrp(float %81, float %92, float %95) %97 = fsub float 1.000000e+00, %40 %98 = fmul float %73, %97 %99 = fmul float %94, %98 %100 = fmul float %96, %98 %101 = fmul float %99, 4.000000e+01 %102 = fmul float %100, 4.000000e+01 %103 = fmul float %101, %101 %104 = fmul float %102, %102 %105 = fadd float %103, %104 %106 = fadd float %105, 0xBFB99999A0000000 %107 = fadd float %105, 0xBFB99999A0000000 %108 = fadd float %105, 0xBFB99999A0000000 %109 = fadd float %105, 0xBFB99999A0000000 %110 = fcmp olt float %106, 0.000000e+00 %111 = fcmp olt float %107, 0.000000e+00 %112 = fcmp olt float %108, 0.000000e+00 %113 = fcmp olt float %109, 0.000000e+00 %114 = or i1 %113, %112 %115 = or i1 %114, %111 %116 = or i1 %115, %110 %117 = select i1 %116, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %117) %118 = fdiv float 1.000000e+00, %43 %119 = fmul float %118, %41 %120 = fmul float %118, %42 %121 = fmul float %119, %24 %122 = fadd float %121, %27 %123 = fmul float %120, %25 %124 = fadd float %123, %26 %125 = fmul float %99, 0x3FC4141420000000 %126 = fadd float %125, %122 %127 = fmul float %100, 0xBFC4141420000000 %128 = fadd float %127, %124 %129 = fmul float %99, 1.600000e+02 %130 = fmul float %100, 1.600000e+02 %131 = bitcast float %126 to i32 %132 = bitcast float %128 to i32 %133 = insertelement <2 x i32> undef, i32 %131, i32 0 %134 = insertelement <2 x i32> %133, i32 %132, i32 1 %135 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %134, <32 x i8> %29, <16 x i8> %31, i32 2) %136 = extractelement <4 x float> %135, i32 3 %137 = fmul float %136, -2.560000e+02 %138 = fadd float %137, %43 %139 = call float @llvm.AMDGPU.cndlt(float %138, float %129, float 0.000000e+00) %140 = call float @llvm.AMDGPU.cndlt(float %138, float %130, float 0.000000e+00) %141 = call float @llvm.maxnum.f32(float %139, float -2.550000e+02) %142 = call float @llvm.maxnum.f32(float %140, float -2.550000e+02) %143 = fadd float %141, -2.550000e+02 %144 = fadd float %142, -2.550000e+02 %145 = fmul float %141, 0x3F70101020000000 %146 = fmul float %142, 0x3F70101020000000 %147 = call float @llvm.AMDGPU.cndlt(float %143, float %145, float 1.000000e+00) %148 = call float @llvm.AMDGPU.cndlt(float %144, float %146, float 1.000000e+00) %149 = call float @llvm.maxnum.f32(float %147, float 0.000000e+00) %150 = call float @llvm.maxnum.f32(float %148, float 0.000000e+00) %151 = fsub float -0.000000e+00, %147 %152 = call float @llvm.AMDGPU.cndlt(float %147, float %151, float -0.000000e+00) %153 = fsub float -0.000000e+00, %148 %154 = call float @llvm.AMDGPU.cndlt(float %148, float %153, float -0.000000e+00) %155 = call i32 @llvm.SI.packf16(float %149, float %150) %156 = bitcast i32 %155 to float %157 = call i32 @llvm.SI.packf16(float %152, float %154) %158 = bitcast i32 %157 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %156, float %158, float %156, float %158) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v2, 0x358637bd ; 7E0402FF 358637BD v_mov_b32_e32 v3, 0xb58637bd ; 7E0602FF B58637BD v_mov_b32_e32 v4, 0xbfdc1686 ; 7E0802FF BFDC1686 v_mov_b32_e32 v5, 0x40c90fdb ; 7E0A02FF 40C90FDB s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v6, v0, 0, 0, [m0] ; C8180000 v_interp_p2_f32 v6, [v6], v1, 0, 0, [m0] ; C8190001 v_interp_p1_f32 v7, v0, 1, 0, [m0] ; C81C0100 v_interp_p2_f32 v7, [v7], v1, 1, 0, [m0] ; C81D0101 v_interp_p1_f32 v8, v0, 0, 1, [m0] ; C8200400 v_interp_p2_f32 v8, [v8], v1, 0, 1, [m0] ; C8210401 v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800 v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801 v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900 v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[12:19], s[6:7], 0x8 ; C0C60708 v_mad_f32 v11, 2.0, v6, -1.0 ; D282000B 03CE0CF4 v_max_f32_e64 v11, |v11|, v2 ; D220010B 0002050B v_mad_f32 v12, 2.0, v7, -1.0 ; D282000C 03CE0EF4 v_max_f32_e64 v2, |v12|, v2 ; D2200102 0002050C v_mul_f32_e32 v12, v11, v11 ; 1018170B v_mad_f32 v12, v2, v2, v12 ; D282000C 04320502 v_rsq_clamp_f32_e64 v12, |v12| ; D358010C 0000010C v_interp_p1_f32 v0, v0, 3, 2, [m0] ; C8000B00 v_interp_p2_f32 v0, [v0], v1, 3, 2, [m0] ; C8010B01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[6:7], 3, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[12:19], s[8:11] ; F0800300 00430606 v_min_f32_e32 v1, 0x7f7fffff, v12 ; 1E0218FF 7F7FFFFF v_rcp_f32_e32 v1, v1 ; 7E025501 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s0, s[0:3], 0x7 ; C2000107 v_mad_f32 v11, v11, v11, v3 ; D282000B 040E170B v_mad_f32 v2, v2, v2, v11 ; D2820002 042E0502 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 v_mul_f32_e32 v2, 0x3f333333, v8 ; 100410FF 3F333333 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_subrev_f32_e32 v1, v2, v1 ; 0A020302 v_mul_f32_e32 v1, 0x41200000, v1 ; 100202FF 41200000 v_add_f32_e64 v2, |v1|, v3 ; D2060102 00020701 v_mul_f32_e64 v1, |v1|, |v1| ; D2100301 00020301 v_mul_f32_e32 v1, v1, v4 ; 10020901 v_exp_f32_e32 v1, v1 ; 7E024B01 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v1, v1, 1.0, vcc ; D2000001 01A9E501 v_mad_f32 v2, 2.0, v1, 0.5 ; D2820002 03C202F4 v_floor_f32_e32 v3, v2 ; 7E064902 v_subrev_f32_e32 v2, v3, v2 ; 0A040503 v_madak_f32_e32 v2, v2, v5, 0xc0490fdb ; 42040B02 C0490FDB v_mul_f32_e32 v2, 0x3e22f983, v2 ; 100404FF 3E22F983 v_fract_f32_e32 v2, v2 ; 7E044102 v_sin_f32_e32 v2, v2 ; 7E046B02 v_mad_f32 v2, 0.5, v2, 0.5 ; D2820002 03C204F0 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, 2.0, v6, -1.0 ; D2820003 03CE0CF4 v_mad_f32 v4, 2.0, v7, -1.0 ; D2820004 03CE0EF4 v_sub_f32_e32 v5, 1.0, v2 ; 080A04F2 v_mul_f32_e32 v6, v3, v5 ; 100C0B03 v_mad_f32 v3, v2, v3, -v6 ; D2820003 841A0702 v_mul_f32_e32 v5, v4, v5 ; 100A0B04 v_mad_f32 v2, v2, v4, -v5 ; D2820002 84160902 v_sub_f32_e32 v4, 1.0, v8 ; 080810F2 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v3, v1, v3 ; 10060701 v_mul_f32_e32 v1, v1, v2 ; 10020501 v_mul_f32_e32 v2, 0x42200000, v3 ; 100406FF 42200000 v_mul_f32_e32 v4, 0x42200000, v1 ; 100802FF 42200000 v_madak_f32_e32 v4, v4, v4, 0xbdcccccd ; 42080904 BDCCCCCD v_mad_f32 v2, v2, v2, v4 ; D2820002 04120502 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v2, 0, -1.0, vcc ; D2000002 01A9E680 v_cndmask_b32_e64 v2, v2, -1.0, vcc ; D2000002 01A9E702 v_cndmask_b32_e64 v2, v2, -1.0, vcc ; D2000002 01A9E702 v_cndmask_b32_e64 v2, v2, -1.0, vcc ; D2000002 01A9E702 v_cmpx_le_f32_e32 vcc, 0, v2 ; 7C260480 v_rcp_f32_e32 v2, v0 ; 7E045500 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 v_mul_f32_e32 v4, v9, v2 ; 10080509 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s0 ; 7E0A0200 v_mad_f32 v4, s8, v4, v5 ; D2820004 04160808 v_mul_f32_e32 v2, v10, v2 ; 1004050A v_mov_b32_e32 v5, s10 ; 7E0A020A v_mad_f32 v2, s9, v2, v5 ; D2820002 04160409 v_madmk_f32_e32 v4, v3, v4, 0x3e20a0a1 ; 40080903 3E20A0A1 v_madmk_f32_e32 v5, v1, v2, 0xbe20a0a1 ; 400A0501 BE20A0A1 v_mul_f32_e32 v2, 0x43200000, v3 ; 100406FF 43200000 v_mul_f32_e32 v1, 0x43200000, v1 ; 100202FF 43200000 image_sample v3, 8, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[16:23], s[12:15] ; F0800800 00640304 s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v0, v3, v0, 0xc3800000 ; 40000103 C3800000 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, 0, v2, vcc ; D2000000 01AA0480 v_cndmask_b32_e64 v1, 0, v1, vcc ; D2000001 01AA0280 v_mov_b32_e32 v2, 0xc37f0000 ; 7E0402FF C37F0000 v_max_f32_e32 v0, v2, v0 ; 20000102 v_max_f32_e32 v1, v2, v1 ; 20020302 v_add_f32_e32 v3, v2, v0 ; 06060102 v_add_f32_e32 v2, v2, v1 ; 06040302 v_mul_f32_e32 v0, 0x3b808081, v0 ; 100000FF 3B808081 v_mul_f32_e32 v1, 0x3b808081, v1 ; 100202FF 3B808081 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v0, 1.0, v0, vcc ; D2000000 01AA00F2 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v1, 1.0, v1, vcc ; D2000001 01AA02F2 v_max_f32_e32 v2, 0, v0 ; 20040080 v_max_f32_e32 v3, 0, v1 ; 20060280 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_xor_b32_e32 v5, v0, v4 ; 3A0A0900 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, v4, v5, vcc ; D2000000 01AA0B04 v_xor_b32_e32 v5, v1, v4 ; 3A0A0901 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, v4, v5, vcc ; D2000001 01AA0B04 v_cvt_pkrtz_f16_f32_e32 v2, v2, v3 ; 5E040702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Code Size: 728 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0] DCL TEMP[0..1] DCL TEMP[2], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 0.0000, -0.0000} IMM[1] FLT32 { 0.0000, 0.7000, 10.0000, -1.7194} IMM[2] FLT32 { 0.0025, 0.0000, 340282346638528859811704183484516925440.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], IMM[0].xxxx, IMM[0].yyyy 1: MAX TEMP[1].xy, |TEMP[0]|, IMM[0].zzzz 2: MUL TEMP[0].xy, TEMP[1], TEMP[1] 3: ADD TEMP[0].x, TEMP[0].yyyy, TEMP[0].xxxx 4: RSQ TEMP[2], |TEMP[0].xxxx| 5: MIN TEMP[0].y, IMM[2].zzzz, TEMP[2] 6: ADD TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 7: RCP_SAT TEMP[0].y, TEMP[0].yyyy 8: CMP TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx, TEMP[0].yyyy 9: MUL_SAT TEMP[0].y, IMM[1].yyyy, IN[1].xxxx 10: ADD TEMP[0].x, -TEMP[0].yyyy, TEMP[0].xxxx 11: MUL TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz 12: MUL TEMP[0].y, |TEMP[0].xxxx|, |TEMP[0].xxxx| 13: ADD TEMP[0].x, |TEMP[0].xxxx|, IMM[0].wwww 14: MUL TEMP[0].y, TEMP[0].yyyy, IMM[1].wwww 15: EX2 TEMP[0].y, TEMP[0].yyyy 16: CMP TEMP[0].x, TEMP[0].xxxx, -IMM[0].yyyy, TEMP[0].yyyy 17: MUL TEMP[0].x, TEMP[0].xxxx, IN[1].yyyy 18: MOV TEMP[1].x, IMM[2].xxxx 19: MAD OUT[0].xyz, TEMP[0].xxxx, TEMP[1].xxxx, CONST[0] 20: MOV OUT[0].w, IMM[1].xxxx 21: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %31 = fmul float %27, 2.000000e+00 %32 = fadd float %31, -1.000000e+00 %33 = fmul float %28, 2.000000e+00 %34 = fadd float %33, -1.000000e+00 %35 = call float @fabs(float %32) %36 = call float @llvm.maxnum.f32(float %35, float 0x3EB0C6F7A0000000) %37 = call float @fabs(float %34) %38 = call float @llvm.maxnum.f32(float %37, float 0x3EB0C6F7A0000000) %39 = fmul float %36, %36 %40 = fmul float %38, %38 %41 = fadd float %40, %39 %42 = call float @fabs(float %41) %43 = call float @llvm.AMDGPU.rsq.clamped.f32(float %42) %44 = call float @llvm.minnum.f32(float %43, float 0x47EFFFFFE0000000) %45 = fadd float %41, 0xBEB0C6F7A0000000 %46 = fdiv float 1.000000e+00, %44 %47 = call float @llvm.AMDIL.clamp.(float %46, float 0.000000e+00, float 1.000000e+00) %48 = call float @llvm.AMDGPU.cndlt(float %45, float 0.000000e+00, float %47) %49 = fmul float %29, 0x3FE6666660000000 %50 = call float @llvm.AMDIL.clamp.(float %49, float 0.000000e+00, float 1.000000e+00) %51 = fsub float %48, %50 %52 = fmul float %51, 1.000000e+01 %53 = call float @fabs(float %52) %54 = call float @fabs(float %52) %55 = fmul float %53, %54 %56 = call float @fabs(float %52) %57 = fadd float %56, 0xBEB0C6F7A0000000 %58 = fmul float %55, 0xBFFB82D0C0000000 %59 = call float @llvm.AMDIL.exp.(float %58) %60 = call float @llvm.AMDGPU.cndlt(float %57, float 1.000000e+00, float %59) %61 = fmul float %60, %30 %62 = fmul float %61, 0x3F647AE140000000 %63 = fadd float %62, %24 %64 = fmul float %61, 0x3F647AE140000000 %65 = fadd float %64, %25 %66 = fmul float %61, 0x3F647AE140000000 %67 = fadd float %66, %26 %68 = call i32 @llvm.SI.packf16(float %63, float %65) %69 = bitcast i32 %68 to float %70 = call i32 @llvm.SI.packf16(float %67, float 0.000000e+00) %71 = bitcast i32 %70 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %69, float %71, float %69, float %71) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_mad_f32 v2, 2.0, v2, -1.0 ; D2820002 03CE04F4 v_mad_f32 v3, 2.0, v3, -1.0 ; D2820003 03CE06F4 v_mov_b32_e32 v5, 0x358637bd ; 7E0A02FF 358637BD v_max_f32_e64 v2, |v2|, v5 ; D2200102 00020B02 v_max_f32_e64 v3, |v3|, v5 ; D2200103 00020B03 v_mul_f32_e32 v5, v2, v2 ; 100A0502 v_mad_f32 v5, v3, v3, v5 ; D2820005 04160703 v_rsq_clamp_f32_e64 v5, |v5| ; D3580105 00000105 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v0, v0, 1, 1, [m0] ; C8000500 v_interp_p2_f32 v0, [v0], v1, 1, 1, [m0] ; C8010501 v_min_f32_e32 v1, 0x7f7fffff, v5 ; 1E020AFF 7F7FFFFF v_rcp_f32_e32 v1, v1 ; 7E025501 v_mov_b32_e32 v5, 0xb58637bd ; 7E0A02FF B58637BD v_mad_f32 v2, v2, v2, v5 ; D2820002 04160502 v_mad_f32 v2, v3, v3, v2 ; D2820002 040A0703 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 v_mul_f32_e32 v2, 0x3f333333, v4 ; 100408FF 3F333333 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_subrev_f32_e32 v1, v2, v1 ; 0A020302 v_mul_f32_e32 v1, 0x41200000, v1 ; 100202FF 41200000 v_mul_f32_e64 v2, |v1|, |v1| ; D2100302 00020301 v_add_f32_e64 v1, |v1|, v5 ; D2060101 00020B01 v_mov_b32_e32 v3, 0xbfdc1686 ; 7E0602FF BFDC1686 v_mul_f32_e32 v2, v2, v3 ; 10040702 v_exp_f32_e32 v2, v2 ; 7E044B02 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v1, v2, 1.0, vcc ; D2000001 01A9E502 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_mov_b32_e32 v1, 0x3b23d70a ; 7E0202FF 3B23D70A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, v1, s4 ; D2820002 00120300 v_mad_f32 v3, v0, v1, s5 ; D2820003 00160300 v_mad_f32 v0, v0, v1, s0 ; D2820000 00020300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 308 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[4], PERSPECTIVE DCL IN[1], TEXCOORD[5], PERSPECTIVE DCL IN[2], TEXCOORD[6], PERSPECTIVE DCL IN[3], TEXCOORD[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL CONST[0..9] DCL TEMP[0..1] DCL TEMP[2], LOCAL DCL TEMP[3..4] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, -0.0000} IMM[1] FLT32 { 8.0000, 0.0398, 0.0000, 0.0039} IMM[2] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0].xyz, CONST[6], CONST[6].wwww 1: TEX TEMP[1], IN[3], SAMP[2], 2D 2: MUL TEMP[0].xyz, TEMP[0], TEMP[1] 3: MAD TEMP[0].xyz, TEMP[0], CONST[5].wwww, CONST[5] 4: TEX TEMP[1], IN[3], SAMP[0], 2D 5: MAD TEMP[1].xyz, TEMP[1], IMM[0].xxxx, IMM[0].yyyy 6: DP3 TEMP[2].x, TEMP[1], TEMP[1] 7: RSQ TEMP[2].x, TEMP[2].xxxx 8: MIN TEMP[2].x, IMM[2].xxxx, TEMP[2].xxxx 9: MUL TEMP[3].xyz, TEMP[1], TEMP[2].xxxx 10: DP3 TEMP[2].x, IN[2], IN[2] 11: RSQ TEMP[2].x, TEMP[2].xxxx 12: MIN TEMP[2].x, IMM[2].xxxx, TEMP[2].xxxx 13: MUL TEMP[1].xyz, IN[2], TEMP[2].xxxx 14: DP3 TEMP[0].w, TEMP[3], TEMP[1] 15: MUL TEMP[4].xyz, TEMP[0].wwww, TEMP[3] 16: MAD TEMP[1].xyz, TEMP[4], IMM[0].xxxx, -TEMP[1] 17: DP3 TEMP[2].x, IN[0], IN[0] 18: RSQ TEMP[2].x, TEMP[2].xxxx 19: MIN TEMP[2].x, IMM[2].xxxx, TEMP[2].xxxx 20: MUL TEMP[4].xyz, IN[0], TEMP[2].xxxx 21: DP3_SAT TEMP[0].w, TEMP[1], TEMP[4] 22: DP3_SAT TEMP[1].x, TEMP[3], TEMP[4] 23: POW TEMP[1].y, |TEMP[0].wwww|, CONST[7].xxxx 24: ADD TEMP[0].w, TEMP[0].wwww, IMM[0].wwww 25: MOV TEMP[3].x, IMM[1].xxxx 26: ADD TEMP[1].z, TEMP[3].xxxx, CONST[7].xxxx 27: MUL TEMP[1].y, TEMP[1].zzzz, TEMP[1].yyyy 28: MUL TEMP[1].y, TEMP[1].yyyy, IMM[1].yyyy 29: MUL TEMP[0].xyz, TEMP[0], TEMP[1].yyyy 30: CMP TEMP[0].xyz, TEMP[0].wwww, IMM[1].zzzz, TEMP[0] 31: ADD TEMP[0].w, TEMP[1].xxxx, IMM[0].wwww 32: MOV TEMP[1].z, IMM[0].zzzz 33: ADD TEMP[1].yzw, TEMP[1].zzzz, -CONST[0].xxyz 34: TEX TEMP[3], IN[3], SAMP[1], 2D 35: MUL TEMP[1].yzw, TEMP[1], TEMP[3].xxyz 36: MAD TEMP[1].yzw, TEMP[1], CONST[4].wwww, CONST[4].xxyz 37: MUL TEMP[3].xyz, TEMP[1].xxxx, TEMP[1].yzww 38: CMP TEMP[3].xyz, TEMP[0].wwww, IMM[1].zzzz, TEMP[3] 39: ADD TEMP[0].xyz, TEMP[0], TEMP[3] 40: MOV TEMP[3].xyz, CONST[0] 41: MAD TEMP[0].xyz, TEMP[0], CONST[8], TEMP[3] 42: MAD OUT[0].xyz, TEMP[1].yzww, CONST[9], TEMP[0] 43: MUL OUT[0].w, IMM[1].wwww, IN[1].wwww 44: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %46 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %47 = load <32 x i8>, <32 x i8> addrspace(2)* %46, align 32, !tbaa !0 %48 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !tbaa !0 %50 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %51 = bitcast <8 x i32> addrspace(2)* %50 to <32 x i8> addrspace(2)* %52 = load <32 x i8>, <32 x i8> addrspace(2)* %51, align 32, !tbaa !0 %53 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %54 = bitcast <4 x i32> addrspace(2)* %53 to <16 x i8> addrspace(2)* %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %57 = bitcast <8 x i32> addrspace(2)* %56 to <32 x i8> addrspace(2)* %58 = load <32 x i8>, <32 x i8> addrspace(2)* %57, align 32, !tbaa !0 %59 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %60 = bitcast <4 x i32> addrspace(2)* %59 to <16 x i8> addrspace(2)* %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 %62 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %63 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %66 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %67 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %68 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %71 = fmul float %35, %38 %72 = fmul float %36, %38 %73 = fmul float %37, %38 %74 = bitcast float %69 to i32 %75 = bitcast float %70 to i32 %76 = insertelement <2 x i32> undef, i32 %74, i32 0 %77 = insertelement <2 x i32> %76, i32 %75, i32 1 %78 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %77, <32 x i8> %58, <16 x i8> %61, i32 2) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 1 %81 = extractelement <4 x float> %78, i32 2 %82 = fmul float %71, %79 %83 = fmul float %72, %80 %84 = fmul float %73, %81 %85 = fmul float %82, %34 %86 = fadd float %85, %31 %87 = fmul float %83, %34 %88 = fadd float %87, %32 %89 = fmul float %84, %34 %90 = fadd float %89, %33 %91 = bitcast float %69 to i32 %92 = bitcast float %70 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %94, <32 x i8> %47, <16 x i8> %49, i32 2) %96 = extractelement <4 x float> %95, i32 0 %97 = extractelement <4 x float> %95, i32 1 %98 = extractelement <4 x float> %95, i32 2 %99 = fmul float %96, 2.000000e+00 %100 = fadd float %99, -1.000000e+00 %101 = fmul float %97, 2.000000e+00 %102 = fadd float %101, -1.000000e+00 %103 = fmul float %98, 2.000000e+00 %104 = fadd float %103, -1.000000e+00 %105 = fmul float %100, %100 %106 = fmul float %102, %102 %107 = fadd float %106, %105 %108 = fmul float %104, %104 %109 = fadd float %107, %108 %110 = call float @llvm.AMDGPU.rsq.clamped.f32(float %109) %111 = call float @llvm.minnum.f32(float %110, float 0x47EFFFFFE0000000) %112 = fmul float %100, %111 %113 = fmul float %102, %111 %114 = fmul float %104, %111 %115 = fmul float %66, %66 %116 = fmul float %67, %67 %117 = fadd float %116, %115 %118 = fmul float %68, %68 %119 = fadd float %117, %118 %120 = call float @llvm.AMDGPU.rsq.clamped.f32(float %119) %121 = call float @llvm.minnum.f32(float %120, float 0x47EFFFFFE0000000) %122 = fmul float %66, %121 %123 = fmul float %67, %121 %124 = fmul float %68, %121 %125 = fmul float %112, %122 %126 = fmul float %113, %123 %127 = fadd float %126, %125 %128 = fmul float %114, %124 %129 = fadd float %127, %128 %130 = fmul float %129, %112 %131 = fmul float %129, %113 %132 = fmul float %129, %114 %133 = fmul float %130, 2.000000e+00 %134 = fsub float %133, %122 %135 = fmul float %131, 2.000000e+00 %136 = fsub float %135, %123 %137 = fmul float %132, 2.000000e+00 %138 = fsub float %137, %124 %139 = fmul float %62, %62 %140 = fmul float %63, %63 %141 = fadd float %140, %139 %142 = fmul float %64, %64 %143 = fadd float %141, %142 %144 = call float @llvm.AMDGPU.rsq.clamped.f32(float %143) %145 = call float @llvm.minnum.f32(float %144, float 0x47EFFFFFE0000000) %146 = fmul float %62, %145 %147 = fmul float %63, %145 %148 = fmul float %64, %145 %149 = fmul float %134, %146 %150 = fmul float %136, %147 %151 = fadd float %150, %149 %152 = fmul float %138, %148 %153 = fadd float %151, %152 %154 = call float @llvm.AMDIL.clamp.(float %153, float 0.000000e+00, float 1.000000e+00) %155 = fmul float %112, %146 %156 = fmul float %113, %147 %157 = fadd float %156, %155 %158 = fmul float %114, %148 %159 = fadd float %157, %158 %160 = call float @llvm.AMDIL.clamp.(float %159, float 0.000000e+00, float 1.000000e+00) %161 = call float @fabs(float %154) %162 = call float @llvm.pow.f32(float %161, float %39) %163 = fadd float %154, 0xBEB0C6F7A0000000 %164 = fadd float %39, 8.000000e+00 %165 = fmul float %164, %162 %166 = fmul float %165, 0x3FA45F3060000000 %167 = fmul float %86, %166 %168 = fmul float %88, %166 %169 = fmul float %90, %166 %170 = call float @llvm.AMDGPU.cndlt(float %163, float 0.000000e+00, float %167) %171 = call float @llvm.AMDGPU.cndlt(float %163, float 0.000000e+00, float %168) %172 = call float @llvm.AMDGPU.cndlt(float %163, float 0.000000e+00, float %169) %173 = fadd float %160, 0xBEB0C6F7A0000000 %174 = fsub float 1.000000e+00, %24 %175 = fsub float 1.000000e+00, %25 %176 = fsub float 1.000000e+00, %26 %177 = bitcast float %69 to i32 %178 = bitcast float %70 to i32 %179 = insertelement <2 x i32> undef, i32 %177, i32 0 %180 = insertelement <2 x i32> %179, i32 %178, i32 1 %181 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %180, <32 x i8> %52, <16 x i8> %55, i32 2) %182 = extractelement <4 x float> %181, i32 0 %183 = extractelement <4 x float> %181, i32 1 %184 = extractelement <4 x float> %181, i32 2 %185 = fmul float %174, %182 %186 = fmul float %175, %183 %187 = fmul float %176, %184 %188 = fmul float %185, %30 %189 = fadd float %188, %27 %190 = fmul float %186, %30 %191 = fadd float %190, %28 %192 = fmul float %187, %30 %193 = fadd float %192, %29 %194 = fmul float %160, %189 %195 = fmul float %160, %191 %196 = fmul float %160, %193 %197 = call float @llvm.AMDGPU.cndlt(float %173, float 0.000000e+00, float %194) %198 = call float @llvm.AMDGPU.cndlt(float %173, float 0.000000e+00, float %195) %199 = call float @llvm.AMDGPU.cndlt(float %173, float 0.000000e+00, float %196) %200 = fadd float %170, %197 %201 = fadd float %171, %198 %202 = fadd float %172, %199 %203 = fmul float %200, %40 %204 = fadd float %203, %24 %205 = fmul float %201, %41 %206 = fadd float %205, %25 %207 = fmul float %202, %42 %208 = fadd float %207, %26 %209 = fmul float %189, %43 %210 = fadd float %209, %204 %211 = fmul float %191, %44 %212 = fadd float %211, %206 %213 = fmul float %193, %45 %214 = fadd float %213, %208 %215 = fmul float %65, 3.906250e-03 %216 = call i32 @llvm.SI.packf16(float %210, float %212) %217 = bitcast i32 %216 to float %218 = call i32 @llvm.SI.packf16(float %214, float %215) %219 = bitcast i32 %218 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %217, float %219, float %217, float %219) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 1, [m0] ; C8140700 v_interp_p2_f32 v5, [v5], v1, 3, 1, [m0] ; C8150701 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s36, s[8:11], 0x0 ; C2120900 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 s_buffer_load_dword s3, s[8:11], 0x1 ; C2018901 s_buffer_load_dword s2, s[8:11], 0x2 ; C2010902 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 s_buffer_load_dword s0, s[8:11], 0x10 ; C2000910 v_interp_p1_f32 v8, v0, 2, 2, [m0] ; C8200A00 v_interp_p2_f32 v8, [v8], v1, 2, 2, [m0] ; C8210A01 s_buffer_load_dword s1, s[8:11], 0x11 ; C2008911 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v9, 1.0, s36 ; D2080009 000048F2 s_buffer_load_dword s37, s[8:11], 0x12 ; C2128912 v_sub_f32_e64 v10, 1.0, s3 ; D208000A 000006F2 v_sub_f32_e64 v11, 1.0, s2 ; D208000B 000004F2 s_buffer_load_dword s38, s[8:11], 0x13 ; C2130913 s_buffer_load_dword s39, s[8:11], 0x14 ; C2138914 s_buffer_load_dword s40, s[8:11], 0x15 ; C2140915 s_buffer_load_dword s41, s[8:11], 0x16 ; C2148916 s_buffer_load_dword s42, s[8:11], 0x17 ; C2150917 s_buffer_load_dword s43, s[8:11], 0x18 ; C2158918 s_buffer_load_dword s44, s[8:11], 0x19 ; C2160919 s_buffer_load_dword s45, s[8:11], 0x1a ; C216891A s_buffer_load_dword s46, s[8:11], 0x1b ; C217091B s_buffer_load_dword s47, s[8:11], 0x1c ; C217891C s_buffer_load_dword s48, s[8:11], 0x20 ; C2180920 s_buffer_load_dword s49, s[8:11], 0x21 ; C2188921 s_buffer_load_dword s50, s[8:11], 0x22 ; C2190922 s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx4 s[52:55], s[4:5], 0x8 ; C09A0508 s_load_dwordx8 s[56:63], s[6:7], 0x10 ; C0DC0710 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v13, v0, 1, 3, [m0] ; C8340D00 v_interp_p2_f32 v13, [v13], v1, 1, 3, [m0] ; C8350D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[56:63], s[52:55] ; F0800700 01AE0E0C image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[28:35], s[24:27] ; F0800700 00C7110C image_sample v[20:22], 7, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[16:23], s[12:15] ; F0800700 0064140C s_buffer_load_dword s4, s[8:11], 0x24 ; C2020924 s_buffer_load_dword s5, s[8:11], 0x25 ; C2028925 s_buffer_load_dword s6, s[8:11], 0x26 ; C2030926 v_mov_b32_e32 v0, s46 ; 7E00022E v_mul_f32_e32 v0, s43, v0 ; 1000002B v_mov_b32_e32 v1, s46 ; 7E02022E v_mul_f32_e32 v1, s44, v1 ; 1002022C v_mov_b32_e32 v12, s46 ; 7E18022E v_mul_f32_e32 v12, s45, v12 ; 1018182D s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v0, v14, v0 ; 1000010E v_mul_f32_e32 v1, v15, v1 ; 1002030F v_mul_f32_e32 v12, v16, v12 ; 10181910 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v9, v20, v9 ; 10121314 v_mul_f32_e32 v10, v21, v10 ; 10141515 v_mul_f32_e32 v11, v22, v11 ; 10161716 v_mov_b32_e32 v13, s39 ; 7E1A0227 v_mad_f32 v0, s42, v0, v13 ; D2820000 0436002A v_mov_b32_e32 v13, s40 ; 7E1A0228 v_mad_f32 v1, s42, v1, v13 ; D2820001 0436022A v_mov_b32_e32 v13, s41 ; 7E1A0229 v_mad_f32 v12, s42, v12, v13 ; D282000C 0436182A v_mad_f32 v13, 2.0, v17, -1.0 ; D282000D 03CE22F4 v_mad_f32 v14, 2.0, v18, -1.0 ; D282000E 03CE24F4 v_mad_f32 v15, 2.0, v19, -1.0 ; D282000F 03CE26F4 v_mov_b32_e32 v16, s0 ; 7E200200 v_mul_f32_e32 v17, v13, v13 ; 10221B0D v_mad_f32 v17, v14, v14, v17 ; D2820011 04461D0E v_mad_f32 v17, v15, v15, v17 ; D2820011 04461F0F v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 v_mad_f32 v9, s38, v9, v16 ; D2820009 04421226 v_mov_b32_e32 v16, s1 ; 7E200201 v_mad_f32 v10, s38, v10, v16 ; D282000A 04421426 v_mov_b32_e32 v16, s37 ; 7E200225 v_mad_f32 v11, s38, v11, v16 ; D282000B 04421626 v_min_f32_e32 v16, 0x7f7fffff, v17 ; 1E2022FF 7F7FFFFF v_mul_f32_e32 v17, v6, v6 ; 10220D06 v_mad_f32 v17, v7, v7, v17 ; D2820011 04460F07 v_mad_f32 v17, v8, v8, v17 ; D2820011 04461108 v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 v_mul_f32_e32 v13, v16, v13 ; 101A1B10 v_mul_f32_e32 v14, v16, v14 ; 101C1D10 v_mul_f32_e32 v15, v16, v15 ; 101E1F10 v_min_f32_e32 v16, 0x7f7fffff, v17 ; 1E2022FF 7F7FFFFF v_mul_f32_e32 v17, v16, v6 ; 10220D10 v_mul_f32_e32 v17, v17, v13 ; 10221B11 v_mul_f32_e32 v18, v16, v7 ; 10240F10 v_mad_f32 v17, v14, v18, v17 ; D2820011 0446250E v_mul_f32_e32 v18, v16, v8 ; 10241110 v_mad_f32 v17, v15, v18, v17 ; D2820011 0446250F v_mul_f32_e32 v18, v13, v17 ; 1024230D v_mad_f32 v18, v17, v13, v18 ; D2820012 044A1B11 v_mad_f32 v6, -v6, v16, v18 ; D2820006 244A2106 v_mul_f32_e32 v18, v14, v17 ; 1024230E v_mad_f32 v18, v17, v14, v18 ; D2820012 044A1D11 v_mad_f32 v7, -v7, v16, v18 ; D2820007 244A2107 v_mul_f32_e32 v18, v2, v2 ; 10240502 v_mad_f32 v18, v3, v3, v18 ; D2820012 044A0703 v_mad_f32 v18, v4, v4, v18 ; D2820012 044A0904 v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 v_mul_f32_e32 v19, v15, v17 ; 1026230F v_mad_f32 v17, v17, v15, v19 ; D2820011 044E1F11 v_mad_f32 v8, -v8, v16, v17 ; D2820008 24462108 v_min_f32_e32 v16, 0x7f7fffff, v18 ; 1E2024FF 7F7FFFFF v_mul_f32_e32 v2, v16, v2 ; 10040510 v_mul_f32_e32 v3, v16, v3 ; 10060710 v_mul_f32_e32 v4, v16, v4 ; 10080910 v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mad_f32 v6, v7, v3, v6 ; D2820006 041A0707 v_mad_f32 v6, v8, v4, v6 ; D2820006 041A0908 v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80 v_and_b32_e32 v7, 0x7fffffff, v6 ; 360E0CFF 7FFFFFFF v_log_f32_e32 v7, v7 ; 7E0E4F07 v_mul_f32_e32 v2, v2, v13 ; 10041B02 v_mad_f32 v2, v14, v3, v2 ; D2820002 040A070E v_mad_f32 v2, v15, v4, v2 ; D2820002 040A090F v_mul_legacy_f32_e32 v3, s47, v7 ; 0E060E2F v_mov_b32_e32 v4, 0x41000000 ; 7E0802FF 41000000 v_add_f32_e32 v4, s47, v4 ; 0608082F v_exp_f32_e32 v3, v3 ; 7E064B03 v_mul_f32_e32 v3, v3, v4 ; 10060903 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mov_b32_e32 v4, 0xb58637bd ; 7E0802FF B58637BD v_add_f32_e32 v6, v4, v6 ; 060C0D04 v_mul_f32_e32 v3, 0x3d22f983, v3 ; 100606FF 3D22F983 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_cmp_gt_f32_e32 vcc, 0, v6 ; 7C080C80 v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 v_add_f32_e32 v4, v4, v2 ; 06080504 v_mul_f32_e32 v6, v9, v2 ; 100C0509 v_cmp_gt_f32_e64 s[0:1], 0, v4 ; D0080000 00020880 v_cndmask_b32_e64 v4, v6, 0, s[0:1] ; D2000004 00010106 v_add_f32_e32 v0, v4, v0 ; 06000104 v_mov_b32_e32 v4, s36 ; 7E080224 v_mad_f32 v0, s48, v0, v4 ; D2820000 04120030 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_mul_f32_e32 v3, v3, v12 ; 10061903 v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_mul_f32_e32 v4, v10, v2 ; 1008050A v_mul_f32_e32 v2, v11, v2 ; 1004050B v_cndmask_b32_e64 v4, v4, 0, s[0:1] ; D2000004 00010104 v_cndmask_b32_e64 v2, v2, 0, s[0:1] ; D2000002 00010102 v_add_f32_e32 v1, v4, v1 ; 06020304 v_mov_b32_e32 v4, s3 ; 7E080203 v_mad_f32 v1, s49, v1, v4 ; D2820001 04120231 v_add_f32_e32 v2, v2, v3 ; 06040702 v_mov_b32_e32 v3, s2 ; 7E060202 v_mad_f32 v2, s50, v2, v3 ; D2820002 040E0432 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v9, s4, v0 ; D2820000 04000909 v_mad_f32 v1, v10, s5, v1 ; D2820001 04040B0A v_mad_f32 v2, v11, s6, v2 ; D2820002 04080D0B v_mul_f32_e32 v3, 0x3b800000, v5 ; 10060AFF 3B800000 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 24 Code Size: 924 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], TEXCOORD[0] DCL OUT[3], TEXCOORD[4] DCL OUT[4], TEXCOORD[5] DCL OUT[5], TEXCOORD[6] DCL OUT[6].xyz, TEXCOORD[7] DCL CONST[0..13] DCL TEMP[0..5] IMM[0] FLT32 { 0.0078, -1.0000, 1.0000, 0.0000} 0: MUL TEMP[0], CONST[7], IN[0].yyyy 1: MAD TEMP[0], CONST[6], IN[0].xxxx, TEMP[0] 2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[9], IN[0].wwww, TEMP[0] 4: MAD TEMP[1].xyz, TEMP[0], -CONST[4].wwww, CONST[4] 5: MUL TEMP[2].xyz, TEMP[1].yyyy, CONST[11] 6: MAD TEMP[1].xyw, CONST[10].xyzz, TEMP[1].xxxx, TEMP[2].xyzz 7: MAD TEMP[1].xyz, CONST[12], TEMP[1].zzzz, TEMP[1].xyww 8: MAD TEMP[2].xyz, IN[1].yzxw, IMM[0].xxxx, IMM[0].yyyy 9: MAD TEMP[3], IN[2], IMM[0].xxxx, IMM[0].yyyy 10: MUL TEMP[4].xyz, TEMP[2], TEMP[3].zxyw 11: MAD TEMP[2].xyz, TEMP[3].yzxw, TEMP[2].yzxw, -TEMP[4] 12: MUL TEMP[2].xyz, TEMP[3].wwww, TEMP[2] 13: MUL TEMP[4].xyz, TEMP[3].yzxw, TEMP[2].zxyw 14: MAD TEMP[4].xyz, TEMP[2].yzxw, TEMP[3].zxyw, -TEMP[4] 15: MUL TEMP[4].xyz, TEMP[3].wwww, TEMP[4] 16: DP3 OUT[5].x, TEMP[4], TEMP[1] 17: DP3 OUT[5].y, TEMP[2], TEMP[1] 18: DP3 OUT[5].z, TEMP[3], TEMP[1] 19: MOV TEMP[1].xyz, CONST[13] 20: MUL TEMP[5].xyz, TEMP[1].yyyy, CONST[11] 21: MAD TEMP[1].xyw, CONST[10].xyzz, TEMP[1].xxxx, TEMP[5].xyzz 22: MAD TEMP[1].xyz, CONST[12], TEMP[1].zzzz, TEMP[1].xyww 23: DP3 OUT[3].x, TEMP[4], TEMP[1] 24: DP3 OUT[6].x, TEMP[4], CONST[12] 25: DP3 OUT[3].y, TEMP[2], TEMP[1] 26: DP3 OUT[6].y, TEMP[2], CONST[12] 27: DP3 OUT[3].z, TEMP[3], TEMP[1] 28: DP3 OUT[6].z, TEMP[3], CONST[12] 29: MOV OUT[1], IN[3] 30: MUL OUT[2], IMM[0].zzww, IN[4].xyxx 31: MOV OUT[3].w, IMM[0].wwww 32: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 33: MAD TEMP[1], CONST[0], TEMP[0].xxxx, TEMP[1] 34: MAD TEMP[1], CONST[2], TEMP[0].zzzz, TEMP[1] 35: MAD TEMP[0], CONST[3], TEMP[0].wwww, TEMP[1] 36: MOV OUT[4], TEMP[0] 37: MOV OUT[0], TEMP[0] 38: MOV OUT[5].w, IMM[0].zzzz 39: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %61 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = add i32 %5, %7 %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %62, i32 0, i32 %63) %65 = extractelement <4 x float> %64, i32 0 %66 = extractelement <4 x float> %64, i32 1 %67 = extractelement <4 x float> %64, i32 2 %68 = extractelement <4 x float> %64, i32 3 %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = add i32 %5, %7 %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %71) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = extractelement <4 x float> %72, i32 2 %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !tbaa !0 %78 = add i32 %5, %7 %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %77, i32 0, i32 %78) %80 = extractelement <4 x float> %79, i32 0 %81 = extractelement <4 x float> %79, i32 1 %82 = extractelement <4 x float> %79, i32 2 %83 = extractelement <4 x float> %79, i32 3 %84 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %85 = load <16 x i8>, <16 x i8> addrspace(2)* %84, align 16, !tbaa !0 %86 = add i32 %5, %7 %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %85, i32 0, i32 %86) %88 = extractelement <4 x float> %87, i32 0 %89 = extractelement <4 x float> %87, i32 1 %90 = extractelement <4 x float> %87, i32 2 %91 = extractelement <4 x float> %87, i32 3 %92 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0 %94 = add i32 %5, %7 %95 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %93, i32 0, i32 %94) %96 = extractelement <4 x float> %95, i32 0 %97 = extractelement <4 x float> %95, i32 1 %98 = fmul float %37, %66 %99 = fmul float %38, %66 %100 = fmul float %39, %66 %101 = fmul float %40, %66 %102 = fmul float %33, %65 %103 = fadd float %102, %98 %104 = fmul float %34, %65 %105 = fadd float %104, %99 %106 = fmul float %35, %65 %107 = fadd float %106, %100 %108 = fmul float %36, %65 %109 = fadd float %108, %101 %110 = fmul float %41, %67 %111 = fadd float %110, %103 %112 = fmul float %42, %67 %113 = fadd float %112, %105 %114 = fmul float %43, %67 %115 = fadd float %114, %107 %116 = fmul float %44, %67 %117 = fadd float %116, %109 %118 = fmul float %45, %68 %119 = fadd float %118, %111 %120 = fmul float %46, %68 %121 = fadd float %120, %113 %122 = fmul float %47, %68 %123 = fadd float %122, %115 %124 = fmul float %48, %68 %125 = fadd float %124, %117 %126 = fmul float %32, %119 %127 = fsub float %29, %126 %128 = fmul float %32, %121 %129 = fsub float %30, %128 %130 = fmul float %32, %123 %131 = fsub float %31, %130 %132 = fmul float %129, %52 %133 = fmul float %129, %53 %134 = fmul float %129, %54 %135 = fmul float %49, %127 %136 = fadd float %135, %132 %137 = fmul float %50, %127 %138 = fadd float %137, %133 %139 = fmul float %51, %127 %140 = fadd float %139, %134 %141 = fmul float %55, %131 %142 = fadd float %141, %136 %143 = fmul float %56, %131 %144 = fadd float %143, %138 %145 = fmul float %57, %131 %146 = fadd float %145, %140 %147 = fmul float %74, 0x3F80101020000000 %148 = fadd float %147, -1.000000e+00 %149 = fmul float %75, 0x3F80101020000000 %150 = fadd float %149, -1.000000e+00 %151 = fmul float %73, 0x3F80101020000000 %152 = fadd float %151, -1.000000e+00 %153 = fmul float %80, 0x3F80101020000000 %154 = fadd float %153, -1.000000e+00 %155 = fmul float %81, 0x3F80101020000000 %156 = fadd float %155, -1.000000e+00 %157 = fmul float %82, 0x3F80101020000000 %158 = fadd float %157, -1.000000e+00 %159 = fmul float %83, 0x3F80101020000000 %160 = fadd float %159, -1.000000e+00 %161 = fmul float %148, %158 %162 = fmul float %150, %154 %163 = fmul float %152, %156 %164 = fmul float %156, %150 %165 = fsub float %164, %161 %166 = fmul float %158, %152 %167 = fsub float %166, %162 %168 = fmul float %154, %148 %169 = fsub float %168, %163 %170 = fmul float %160, %165 %171 = fmul float %160, %167 %172 = fmul float %160, %169 %173 = fmul float %156, %172 %174 = fmul float %158, %170 %175 = fmul float %154, %171 %176 = fmul float %171, %158 %177 = fsub float %176, %173 %178 = fmul float %172, %154 %179 = fsub float %178, %174 %180 = fmul float %170, %156 %181 = fsub float %180, %175 %182 = fmul float %160, %177 %183 = fmul float %160, %179 %184 = fmul float %160, %181 %185 = fmul float %182, %142 %186 = fmul float %183, %144 %187 = fadd float %186, %185 %188 = fmul float %184, %146 %189 = fadd float %187, %188 %190 = fmul float %170, %142 %191 = fmul float %171, %144 %192 = fadd float %191, %190 %193 = fmul float %172, %146 %194 = fadd float %192, %193 %195 = fmul float %154, %142 %196 = fmul float %156, %144 %197 = fadd float %196, %195 %198 = fmul float %158, %146 %199 = fadd float %197, %198 %200 = fmul float %59, %52 %201 = fmul float %59, %53 %202 = fmul float %59, %54 %203 = fmul float %49, %58 %204 = fadd float %203, %200 %205 = fmul float %50, %58 %206 = fadd float %205, %201 %207 = fmul float %51, %58 %208 = fadd float %207, %202 %209 = fmul float %55, %60 %210 = fadd float %209, %204 %211 = fmul float %56, %60 %212 = fadd float %211, %206 %213 = fmul float %57, %60 %214 = fadd float %213, %208 %215 = fmul float %182, %210 %216 = fmul float %183, %212 %217 = fadd float %216, %215 %218 = fmul float %184, %214 %219 = fadd float %217, %218 %220 = fmul float %182, %55 %221 = fmul float %183, %56 %222 = fadd float %221, %220 %223 = fmul float %184, %57 %224 = fadd float %222, %223 %225 = fmul float %170, %210 %226 = fmul float %171, %212 %227 = fadd float %226, %225 %228 = fmul float %172, %214 %229 = fadd float %227, %228 %230 = fmul float %170, %55 %231 = fmul float %171, %56 %232 = fadd float %231, %230 %233 = fmul float %172, %57 %234 = fadd float %232, %233 %235 = fmul float %154, %210 %236 = fmul float %156, %212 %237 = fadd float %236, %235 %238 = fmul float %158, %214 %239 = fadd float %237, %238 %240 = fmul float %154, %55 %241 = fmul float %156, %56 %242 = fadd float %241, %240 %243 = fmul float %158, %57 %244 = fadd float %242, %243 %245 = fmul float %96, 0.000000e+00 %246 = fmul float %96, 0.000000e+00 %247 = fmul float %121, %17 %248 = fmul float %121, %18 %249 = fmul float %121, %19 %250 = fmul float %121, %20 %251 = fmul float %13, %119 %252 = fadd float %251, %247 %253 = fmul float %14, %119 %254 = fadd float %253, %248 %255 = fmul float %15, %119 %256 = fadd float %255, %249 %257 = fmul float %16, %119 %258 = fadd float %257, %250 %259 = fmul float %21, %123 %260 = fadd float %259, %252 %261 = fmul float %22, %123 %262 = fadd float %261, %254 %263 = fmul float %23, %123 %264 = fadd float %263, %256 %265 = fmul float %24, %123 %266 = fadd float %265, %258 %267 = fmul float %25, %125 %268 = fadd float %267, %260 %269 = fmul float %26, %125 %270 = fadd float %269, %262 %271 = fmul float %27, %125 %272 = fadd float %271, %264 %273 = fmul float %28, %125 %274 = fadd float %273, %266 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %88, float %89, float %90, float %91) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %96, float %97, float %245, float %246) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %219, float %229, float %239, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %268, float %270, float %272, float %274) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %189, float %194, float %199, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %224, float %234, float %244, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %268, float %270, float %272, float %274) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0x3c008081 ; 7E0202FF 3C008081 v_mov_b32_e32 v2, 0 ; 7E040280 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[2:3], 0x0 ; C0820300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C s_load_dwordx4 s[24:27], s[8:9], 0x10 ; C08C0910 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s28, s[4:7], 0x1d ; C20E051D buffer_load_format_xyzw v[4:7], v0, s[0:3], 0 idxen ; E00C2000 80000400 buffer_load_format_xyzw v[8:11], v0, s[12:15], 0 idxen ; E00C2000 80030800 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00 buffer_load_format_xyzw v[15:18], v0, s[20:23], 0 idxen ; E00C2000 80050F00 s_buffer_load_dword s12, s[4:7], 0x1e ; C206051E s_buffer_load_dword s13, s[4:7], 0x1f ; C206851F s_buffer_load_dword s14, s[4:7], 0x20 ; C2070520 s_buffer_load_dword s15, s[4:7], 0x21 ; C2078521 s_buffer_load_dword s16, s[4:7], 0x18 ; C2080518 s_buffer_load_dword s17, s[4:7], 0x19 ; C2088519 s_buffer_load_dword s18, s[4:7], 0x1a ; C209051A s_buffer_load_dword s19, s[4:7], 0x1b ; C209851B s_buffer_load_dword s20, s[4:7], 0x1c ; C20A051C s_buffer_load_dword s21, s[4:7], 0x22 ; C20A8522 s_buffer_load_dword s22, s[4:7], 0x23 ; C20B0523 s_buffer_load_dword s23, s[4:7], 0x24 ; C20B8524 s_buffer_load_dword s29, s[4:7], 0x25 ; C20E8525 s_buffer_load_dword s30, s[4:7], 0x26 ; C20F0526 s_buffer_load_dword s8, s[4:7], 0xf ; C204050F s_buffer_load_dword s0, s[4:7], 0x10 ; C2000510 s_buffer_load_dword s1, s[4:7], 0x11 ; C2008511 s_buffer_load_dword s31, s[4:7], 0x12 ; C20F8512 s_buffer_load_dword s9, s[4:7], 0x13 ; C2048513 s_buffer_load_dword s32, s[4:7], 0x27 ; C2100527 s_buffer_load_dword s33, s[4:7], 0x28 ; C2108528 s_buffer_load_dword s34, s[4:7], 0x29 ; C2110529 s_buffer_load_dword s10, s[4:7], 0x2a ; C205052A s_buffer_load_dword s35, s[4:7], 0x2c ; C211852C s_buffer_load_dword s36, s[4:7], 0x34 ; C2120534 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v19, s0 ; 7E260200 s_buffer_load_dword s11, s[4:7], 0x35 ; C2058535 v_mov_b32_e32 v20, s1 ; 7E280201 s_buffer_load_dword s1, s[4:7], 0x36 ; C2008536 s_buffer_load_dword s37, s[4:7], 0x2d ; C212852D s_buffer_load_dword s38, s[4:7], 0x2e ; C213052E s_buffer_load_dword s2, s[4:7], 0x30 ; C2010530 s_buffer_load_dword s3, s[4:7], 0x31 ; C2018531 s_buffer_load_dword s0, s[4:7], 0x32 ; C2000532 s_buffer_load_dword s39, s[4:7], 0x0 ; C2138500 s_buffer_load_dword s40, s[4:7], 0x1 ; C2140501 s_buffer_load_dword s41, s[4:7], 0x2 ; C2148502 s_buffer_load_dword s42, s[4:7], 0x3 ; C2150503 s_buffer_load_dword s43, s[4:7], 0x4 ; C2158504 v_mov_b32_e32 v21, s36 ; 7E2A0224 v_mov_b32_e32 v22, s36 ; 7E2C0224 v_mov_b32_e32 v23, s36 ; 7E2E0224 s_buffer_load_dword s36, s[4:7], 0x5 ; C2120505 v_mov_b32_e32 v24, s31 ; 7E30021F s_buffer_load_dword s31, s[4:7], 0x6 ; C20F8506 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v25, s1 ; 7E320201 v_mov_b32_e32 v26, s1 ; 7E340201 v_mov_b32_e32 v27, s35 ; 7E360223 v_mul_f32_e32 v27, s11, v27 ; 1036360B v_mad_f32 v23, v23, s33, v27 ; D2820017 046C4317 v_mad_f32 v23, v26, s2, v23 ; D2820017 045C051A buffer_load_format_xyzw v[26:29], v0, s[24:27], 0 idxen ; E00C2000 80061A00 s_buffer_load_dword s24, s[4:7], 0x7 ; C20C0507 s_buffer_load_dword s25, s[4:7], 0x8 ; C20C8508 s_buffer_load_dword s26, s[4:7], 0x9 ; C20D0509 v_mul_f32_e32 v0, s20, v5 ; 10000A14 v_mad_f32 v9, v9, v1, -1.0 ; D2820009 03CE0309 exp 15, 32, 0, 0, 0, v15, v16, v17, v18 ; F800020F 1211100F s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; BF8C0000 v_mul_f32_e32 v15, 0, v26 ; 101E3480 exp 15, 33, 0, 0, 0, v26, v27, v15, v15 ; F800021F 0F0F1B1A v_mad_f32 v10, v10, v1, -1.0 ; D282000A 03CE030A v_mad_f32 v8, v8, v1, -1.0 ; D2820008 03CE0308 v_mad_f32 v11, v11, v1, -1.0 ; D282000B 03CE030B v_mad_f32 v12, v12, v1, -1.0 ; D282000C 03CE030C v_mad_f32 v13, v13, v1, -1.0 ; D282000D 03CE030D v_mad_f32 v1, v14, v1, -1.0 ; D2820001 03CE030E v_mad_f32 v0, s16, v4, v0 ; D2820000 04020810 v_mul_f32_e32 v14, s28, v5 ; 101C0A1C v_mad_f32 v14, s17, v4, v14 ; D282000E 043A0811 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v15, s12, v5 ; 101E0A0C v_mad_f32 v15, s18, v4, v15 ; D282000F 043E0812 v_mul_f32_e32 v5, s13, v5 ; 100A0A0D v_mad_f32 v4, s19, v4, v5 ; D2820004 04160813 v_mad_f32 v0, s14, v6, v0 ; D2820000 04020C0E v_mad_f32 v5, s15, v6, v14 ; D2820005 043A0C0F v_mad_f32 v14, s21, v6, v15 ; D282000E 043E0C15 v_mad_f32 v4, s22, v6, v4 ; D2820004 04120C16 v_mad_f32 v0, s23, v7, v0 ; D2820000 04020E17 v_mad_f32 v5, s29, v7, v5 ; D2820005 04160E1D v_mad_f32 v6, s30, v7, v14 ; D2820006 043A0E1E v_mad_f32 v4, s32, v7, v4 ; D2820004 04120E20 v_mad_f32 v7, -s9, v5, v20 ; D2820007 24520A09 v_mul_f32_e32 v14, s43, v5 ; 101C0A2B v_mul_f32_e32 v15, s36, v5 ; 101E0A24 v_mul_f32_e32 v16, s31, v5 ; 10200A1F v_mul_f32_e32 v5, s24, v5 ; 100A0A18 v_mad_f32 v14, s39, v0, v14 ; D282000E 043A0027 v_mad_f32 v15, s40, v0, v15 ; D282000F 043E0028 v_mad_f32 v16, s41, v0, v16 ; D2820010 04420029 v_mad_f32 v17, -s9, v0, v19 ; D2820011 244E0009 v_mad_f32 v0, s42, v0, v5 ; D2820000 0416002A v_mul_f32_e32 v5, s35, v7 ; 100A0E23 v_mad_f32 v5, s33, v17, v5 ; D2820005 04162221 v_mul_f32_e32 v18, s37, v7 ; 10240E25 v_mul_f32_e32 v7, s38, v7 ; 100E0E26 v_mad_f32 v18, s34, v17, v18 ; D2820012 044A2222 v_mad_f32 v7, s10, v17, v7 ; D2820007 041E220A v_mov_b32_e32 v17, s37 ; 7E220225 v_mul_f32_e32 v17, s11, v17 ; 1022220B v_mad_f32 v17, v21, s34, v17 ; D2820011 04444515 s_buffer_load_dword s12, s[4:7], 0xa ; C206050A s_buffer_load_dword s13, s[4:7], 0xb ; C206850B s_buffer_load_dword s14, s[4:7], 0xc ; C207050C s_buffer_load_dword s15, s[4:7], 0xd ; C207850D s_buffer_load_dword s4, s[4:7], 0xe ; C202050E v_mov_b32_e32 v19, s38 ; 7E260226 v_mul_f32_e32 v19, s11, v19 ; 1026260B v_mad_f32 v19, v22, s10, v19 ; D2820013 044C1516 v_mad_f32 v20, -s9, v6, v24 ; D2820014 24620C09 v_mad_f32 v14, s25, v6, v14 ; D282000E 043A0C19 v_mad_f32 v15, s26, v6, v15 ; D282000F 043E0C1A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v16, s12, v6, v16 ; D2820010 04420C0C v_mad_f32 v0, s13, v6, v0 ; D2820000 04020C0D v_mad_f32 v6, s14, v4, v14 ; D2820006 043A080E v_mad_f32 v14, s15, v4, v15 ; D282000E 043E080F v_mad_f32 v15, s4, v4, v16 ; D282000F 04420804 v_mad_f32 v0, s8, v4, v0 ; D2820000 04020808 v_mul_f32_e32 v4, v13, v9 ; 1008130D v_mad_f32 v4, v12, v10, -v4 ; D2820004 8412150C v_mul_f32_e32 v10, v11, v10 ; 1014150B v_mad_f32 v10, v13, v8, -v10 ; D282000A 842A110D v_mul_f32_e32 v8, v12, v8 ; 1010110C v_mad_f32 v8, v11, v9, -v8 ; D2820008 8422130B v_mad_f32 v9, v25, s3, v17 ; D2820009 04440719 v_mul_f32_e32 v4, v4, v1 ; 10080304 v_mul_f32_e32 v10, v10, v1 ; 1014030A v_mul_f32_e32 v8, v8, v1 ; 10100308 v_mul_f32_e32 v16, v8, v12 ; 10201908 v_mad_f32 v16, v10, v13, -v16 ; D2820010 84421B0A v_mul_f32_e32 v17, v4, v13 ; 10221B04 v_mad_f32 v17, v8, v11, -v17 ; D2820011 84461708 v_mul_f32_e32 v16, v16, v1 ; 10200310 v_mul_f32_e32 v17, v17, v1 ; 10220311 v_mad_f32 v5, s2, v20, v5 ; D2820005 04162802 v_mad_f32 v18, s3, v20, v18 ; D2820012 044A2803 v_mul_f32_e32 v21, v5, v16 ; 102A2105 v_mul_f32_e32 v22, v5, v4 ; 102C0905 v_mul_f32_e32 v5, v5, v11 ; 100A1705 v_mad_f32 v21, v17, v18, v21 ; D2820015 04562511 v_mad_f32 v22, v10, v18, v22 ; D2820016 045A250A v_mad_f32 v5, v12, v18, v5 ; D2820005 0416250C v_mul_f32_e32 v18, v10, v11 ; 1024170A v_mad_f32 v18, v4, v12, -v18 ; D2820012 844A1904 v_mul_f32_e32 v24, s2, v4 ; 10300802 v_mad_f32 v24, v10, s3, v24 ; D2820018 0460070A v_mul_f32_e32 v4, v23, v4 ; 10080917 v_mad_f32 v4, v10, v9, v4 ; D2820004 0412130A v_mul_f32_e32 v10, s2, v11 ; 10141602 v_mad_f32 v10, v12, s3, v10 ; D282000A 0428070C v_mul_f32_e32 v11, v23, v11 ; 10161717 v_mad_f32 v11, v12, v9, v11 ; D282000B 042E130C v_mul_f32_e32 v12, v23, v16 ; 10182117 v_mad_f32 v9, v17, v9, v12 ; D2820009 04321311 v_mul_f32_e32 v12, s2, v16 ; 10182002 v_mad_f32 v12, v17, s3, v12 ; D282000C 04300711 v_mul_f32_e32 v1, v18, v1 ; 10020312 v_mad_f32 v7, s0, v20, v7 ; D2820007 041E2800 v_mov_b32_e32 v16, s1 ; 7E200201 v_mad_f32 v16, v16, s0, v19 ; D2820010 044C0110 v_mad_f32 v17, v1, v7, v21 ; D2820011 04560F01 v_mad_f32 v18, v8, v7, v22 ; D2820012 045A0F08 v_mad_f32 v5, v13, v7, v5 ; D2820005 04160F0D v_mad_f32 v7, v8, s0, v24 ; D2820007 04600108 v_mad_f32 v4, v8, v16, v4 ; D2820004 04122108 v_mad_f32 v8, v1, v16, v9 ; D2820008 04262101 v_mad_f32 v9, v13, v16, v11 ; D2820009 042E210D exp 15, 34, 0, 0, 0, v8, v4, v9, v2 ; F800022F 02090408 exp 15, 35, 0, 0, 0, v6, v14, v15, v0 ; F800023F 000F0E06 exp 15, 36, 0, 0, 0, v17, v18, v5, v3 ; F800024F 03051211 v_mad_f32 v1, v1, s0, v12 ; D2820001 04300101 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v3, v13, s0, v10 ; D2820003 0428010D exp 15, 37, 0, 0, 0, v1, v7, v3, v2 ; F800025F 02030701 exp 15, 12, 0, 1, 0, v6, v14, v15, v0 ; F80008CF 000F0E06 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 32 Code Size: 1120 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[4], PERSPECTIVE DCL IN[1], TEXCOORD[5], PERSPECTIVE DCL IN[2], TEXCOORD[6], PERSPECTIVE DCL IN[3], TEXCOORD[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL CONST[0..9] DCL TEMP[0..1] DCL TEMP[2], LOCAL DCL TEMP[3..4] IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, -0.0000} IMM[1] FLT32 { 8.0000, 0.0398, 0.0000, 0.0039} IMM[2] FLT32 {340282346638528859811704183484516925440.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0].xyz, CONST[6], CONST[6].wwww 1: TEX TEMP[1], IN[3], SAMP[2], 2D 2: MUL TEMP[0].xyz, TEMP[0], TEMP[1] 3: MAD TEMP[0].xyz, TEMP[0], CONST[5].wwww, CONST[5] 4: TEX TEMP[1], IN[3], SAMP[3], 2D 5: MUL TEMP[0].w, TEMP[1].xxxx, CONST[7].xxxx 6: MAX TEMP[1].x, TEMP[0].wwww, CONST[7].yyyy 7: MIN TEMP[0].w, CONST[7].xxxx, TEMP[1].xxxx 8: TEX TEMP[1], IN[3], SAMP[0], 2D 9: MAD TEMP[1].xyz, TEMP[1], IMM[0].xxxx, IMM[0].yyyy 10: DP3 TEMP[2].x, TEMP[1], TEMP[1] 11: RSQ TEMP[2].x, TEMP[2].xxxx 12: MIN TEMP[2].x, IMM[2].xxxx, TEMP[2].xxxx 13: MUL TEMP[3].xyz, TEMP[1], TEMP[2].xxxx 14: DP3 TEMP[2].x, IN[2], IN[2] 15: RSQ TEMP[2].x, TEMP[2].xxxx 16: MIN TEMP[2].x, IMM[2].xxxx, TEMP[2].xxxx 17: MUL TEMP[1].xyz, IN[2], TEMP[2].xxxx 18: DP3 TEMP[1].w, TEMP[3], TEMP[1] 19: MUL TEMP[4].xyz, TEMP[1].wwww, TEMP[3] 20: MAD TEMP[1].xyz, TEMP[4], IMM[0].xxxx, -TEMP[1] 21: DP3 TEMP[2].x, IN[0], IN[0] 22: RSQ TEMP[2].x, TEMP[2].xxxx 23: MIN TEMP[2].x, IMM[2].xxxx, TEMP[2].xxxx 24: MUL TEMP[4].xyz, IN[0], TEMP[2].xxxx 25: DP3_SAT TEMP[1].x, TEMP[1], TEMP[4] 26: DP3_SAT TEMP[1].y, TEMP[3], TEMP[4] 27: POW TEMP[3].x, |TEMP[1].xxxx|, TEMP[0].wwww 28: ADD TEMP[0].w, TEMP[0].wwww, IMM[1].xxxx 29: MUL TEMP[0].w, TEMP[0].wwww, TEMP[3].xxxx 30: MUL TEMP[0].w, TEMP[0].wwww, IMM[1].yyyy 31: MUL TEMP[0].xyz, TEMP[0], TEMP[0].wwww 32: ADD TEMP[0].w, TEMP[1].xxxx, IMM[0].wwww 33: CMP TEMP[0].xyz, TEMP[0].wwww, IMM[1].zzzz, TEMP[0] 34: ADD TEMP[0].w, TEMP[1].yyyy, IMM[0].wwww 35: MOV TEMP[1].z, IMM[0].zzzz 36: ADD TEMP[1].xzw, TEMP[1].zzzz, -CONST[0].xyyz 37: TEX TEMP[3], IN[3], SAMP[1], 2D 38: MUL TEMP[1].xzw, TEMP[1], TEMP[3].xyyz 39: MAD TEMP[1].xzw, TEMP[1], CONST[4].wwww, CONST[4].xyyz 40: MUL TEMP[3].xyz, TEMP[1].yyyy, TEMP[1].xzww 41: CMP TEMP[3].xyz, TEMP[0].wwww, IMM[1].zzzz, TEMP[3] 42: ADD TEMP[0].xyz, TEMP[0], TEMP[3] 43: MOV TEMP[3].xyz, CONST[0] 44: MAD TEMP[0].xyz, TEMP[0], CONST[8], TEMP[3] 45: MAD OUT[0].xyz, TEMP[1].xzww, CONST[9], TEMP[0] 46: MUL OUT[0].w, IMM[1].wwww, IN[1].wwww 47: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %47 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %48 = load <32 x i8>, <32 x i8> addrspace(2)* %47, align 32, !tbaa !0 %49 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 %51 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %52 = bitcast <8 x i32> addrspace(2)* %51 to <32 x i8> addrspace(2)* %53 = load <32 x i8>, <32 x i8> addrspace(2)* %52, align 32, !tbaa !0 %54 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %55 = bitcast <4 x i32> addrspace(2)* %54 to <16 x i8> addrspace(2)* %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 %57 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %58 = bitcast <8 x i32> addrspace(2)* %57 to <32 x i8> addrspace(2)* %59 = load <32 x i8>, <32 x i8> addrspace(2)* %58, align 32, !tbaa !0 %60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %61 = bitcast <4 x i32> addrspace(2)* %60 to <16 x i8> addrspace(2)* %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %64 = bitcast <8 x i32> addrspace(2)* %63 to <32 x i8> addrspace(2)* %65 = load <32 x i8>, <32 x i8> addrspace(2)* %64, align 32, !tbaa !0 %66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %67 = bitcast <4 x i32> addrspace(2)* %66 to <16 x i8> addrspace(2)* %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %71 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %78 = fmul float %35, %38 %79 = fmul float %36, %38 %80 = fmul float %37, %38 %81 = bitcast float %76 to i32 %82 = bitcast float %77 to i32 %83 = insertelement <2 x i32> undef, i32 %81, i32 0 %84 = insertelement <2 x i32> %83, i32 %82, i32 1 %85 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %84, <32 x i8> %59, <16 x i8> %62, i32 2) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = fmul float %78, %86 %90 = fmul float %79, %87 %91 = fmul float %80, %88 %92 = fmul float %89, %34 %93 = fadd float %92, %31 %94 = fmul float %90, %34 %95 = fadd float %94, %32 %96 = fmul float %91, %34 %97 = fadd float %96, %33 %98 = bitcast float %76 to i32 %99 = bitcast float %77 to i32 %100 = insertelement <2 x i32> undef, i32 %98, i32 0 %101 = insertelement <2 x i32> %100, i32 %99, i32 1 %102 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %101, <32 x i8> %65, <16 x i8> %68, i32 2) %103 = extractelement <4 x float> %102, i32 0 %104 = fmul float %103, %39 %105 = call float @llvm.maxnum.f32(float %104, float %40) %106 = call float @llvm.minnum.f32(float %39, float %105) %107 = bitcast float %76 to i32 %108 = bitcast float %77 to i32 %109 = insertelement <2 x i32> undef, i32 %107, i32 0 %110 = insertelement <2 x i32> %109, i32 %108, i32 1 %111 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %110, <32 x i8> %48, <16 x i8> %50, i32 2) %112 = extractelement <4 x float> %111, i32 0 %113 = extractelement <4 x float> %111, i32 1 %114 = extractelement <4 x float> %111, i32 2 %115 = fmul float %112, 2.000000e+00 %116 = fadd float %115, -1.000000e+00 %117 = fmul float %113, 2.000000e+00 %118 = fadd float %117, -1.000000e+00 %119 = fmul float %114, 2.000000e+00 %120 = fadd float %119, -1.000000e+00 %121 = fmul float %116, %116 %122 = fmul float %118, %118 %123 = fadd float %122, %121 %124 = fmul float %120, %120 %125 = fadd float %123, %124 %126 = call float @llvm.AMDGPU.rsq.clamped.f32(float %125) %127 = call float @llvm.minnum.f32(float %126, float 0x47EFFFFFE0000000) %128 = fmul float %116, %127 %129 = fmul float %118, %127 %130 = fmul float %120, %127 %131 = fmul float %73, %73 %132 = fmul float %74, %74 %133 = fadd float %132, %131 %134 = fmul float %75, %75 %135 = fadd float %133, %134 %136 = call float @llvm.AMDGPU.rsq.clamped.f32(float %135) %137 = call float @llvm.minnum.f32(float %136, float 0x47EFFFFFE0000000) %138 = fmul float %73, %137 %139 = fmul float %74, %137 %140 = fmul float %75, %137 %141 = fmul float %128, %138 %142 = fmul float %129, %139 %143 = fadd float %142, %141 %144 = fmul float %130, %140 %145 = fadd float %143, %144 %146 = fmul float %145, %128 %147 = fmul float %145, %129 %148 = fmul float %145, %130 %149 = fmul float %146, 2.000000e+00 %150 = fsub float %149, %138 %151 = fmul float %147, 2.000000e+00 %152 = fsub float %151, %139 %153 = fmul float %148, 2.000000e+00 %154 = fsub float %153, %140 %155 = fmul float %69, %69 %156 = fmul float %70, %70 %157 = fadd float %156, %155 %158 = fmul float %71, %71 %159 = fadd float %157, %158 %160 = call float @llvm.AMDGPU.rsq.clamped.f32(float %159) %161 = call float @llvm.minnum.f32(float %160, float 0x47EFFFFFE0000000) %162 = fmul float %69, %161 %163 = fmul float %70, %161 %164 = fmul float %71, %161 %165 = fmul float %150, %162 %166 = fmul float %152, %163 %167 = fadd float %166, %165 %168 = fmul float %154, %164 %169 = fadd float %167, %168 %170 = call float @llvm.AMDIL.clamp.(float %169, float 0.000000e+00, float 1.000000e+00) %171 = fmul float %128, %162 %172 = fmul float %129, %163 %173 = fadd float %172, %171 %174 = fmul float %130, %164 %175 = fadd float %173, %174 %176 = call float @llvm.AMDIL.clamp.(float %175, float 0.000000e+00, float 1.000000e+00) %177 = call float @fabs(float %170) %178 = call float @llvm.pow.f32(float %177, float %106) %179 = fadd float %106, 8.000000e+00 %180 = fmul float %179, %178 %181 = fmul float %180, 0x3FA45F3060000000 %182 = fmul float %93, %181 %183 = fmul float %95, %181 %184 = fmul float %97, %181 %185 = fadd float %170, 0xBEB0C6F7A0000000 %186 = call float @llvm.AMDGPU.cndlt(float %185, float 0.000000e+00, float %182) %187 = call float @llvm.AMDGPU.cndlt(float %185, float 0.000000e+00, float %183) %188 = call float @llvm.AMDGPU.cndlt(float %185, float 0.000000e+00, float %184) %189 = fadd float %176, 0xBEB0C6F7A0000000 %190 = fsub float 1.000000e+00, %24 %191 = fsub float 1.000000e+00, %25 %192 = fsub float 1.000000e+00, %26 %193 = bitcast float %76 to i32 %194 = bitcast float %77 to i32 %195 = insertelement <2 x i32> undef, i32 %193, i32 0 %196 = insertelement <2 x i32> %195, i32 %194, i32 1 %197 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %196, <32 x i8> %53, <16 x i8> %56, i32 2) %198 = extractelement <4 x float> %197, i32 0 %199 = extractelement <4 x float> %197, i32 1 %200 = extractelement <4 x float> %197, i32 2 %201 = fmul float %190, %198 %202 = fmul float %191, %199 %203 = fmul float %192, %200 %204 = fmul float %201, %30 %205 = fadd float %204, %27 %206 = fmul float %202, %30 %207 = fadd float %206, %28 %208 = fmul float %203, %30 %209 = fadd float %208, %29 %210 = fmul float %176, %205 %211 = fmul float %176, %207 %212 = fmul float %176, %209 %213 = call float @llvm.AMDGPU.cndlt(float %189, float 0.000000e+00, float %210) %214 = call float @llvm.AMDGPU.cndlt(float %189, float 0.000000e+00, float %211) %215 = call float @llvm.AMDGPU.cndlt(float %189, float 0.000000e+00, float %212) %216 = fadd float %186, %213 %217 = fadd float %187, %214 %218 = fadd float %188, %215 %219 = fmul float %216, %41 %220 = fadd float %219, %24 %221 = fmul float %217, %42 %222 = fadd float %221, %25 %223 = fmul float %218, %43 %224 = fadd float %223, %26 %225 = fmul float %205, %44 %226 = fadd float %225, %220 %227 = fmul float %207, %45 %228 = fadd float %227, %222 %229 = fmul float %209, %46 %230 = fadd float %229, %224 %231 = fmul float %72, 3.906250e-03 %232 = call i32 @llvm.SI.packf16(float %226, float %228) %233 = bitcast i32 %232 to float %234 = call i32 @llvm.SI.packf16(float %230, float %231) %235 = bitcast i32 %234 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %233, float %235, float %233, float %235) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 1, [m0] ; C8140700 v_interp_p2_f32 v5, [v5], v1, 3, 1, [m0] ; C8150701 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 v_interp_p1_f32 v8, v0, 2, 2, [m0] ; C8200A00 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx4 s[24:27], s[4:5], 0x8 ; C08C0508 s_load_dwordx4 s[28:31], s[4:5], 0xc ; C08E050C s_load_dwordx8 s[32:39], s[6:7], 0x10 ; C0D00710 s_load_dwordx8 s[40:47], s[6:7], 0x18 ; C0D40718 s_load_dwordx8 s[48:55], s[6:7], 0x0 ; C0D80700 s_load_dwordx8 s[12:19], s[6:7], 0x8 ; C0C60708 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p2_f32 v8, [v8], v1, 2, 2, [m0] ; C8210A01 v_interp_p1_f32 v9, v0, 0, 3, [m0] ; C8240C00 v_interp_p2_f32 v9, [v9], v1, 0, 3, [m0] ; C8250C01 v_interp_p1_f32 v10, v0, 1, 3, [m0] ; C8280D00 v_interp_p2_f32 v10, [v10], v1, 1, 3, [m0] ; C8290D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[32:39], s[24:27] ; F0800700 00C80B09 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[40:47], s[28:31] ; F0800100 00EA0009 image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[48:55], s[20:23] ; F0800700 00AC0E09 image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[12:19], s[8:11] ; F0800700 00431109 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x10 ; C2038110 s_buffer_load_dword s8, s[0:3], 0x11 ; C2040111 s_buffer_load_dword s9, s[0:3], 0x17 ; C2048117 s_buffer_load_dword s10, s[0:3], 0x18 ; C2050118 s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119 s_buffer_load_dword s12, s[0:3], 0x1a ; C206011A s_buffer_load_dword s13, s[0:3], 0x1b ; C206811B s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112 s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113 s_buffer_load_dword s16, s[0:3], 0x14 ; C2080114 s_buffer_load_dword s17, s[0:3], 0x15 ; C2088115 s_buffer_load_dword s18, s[0:3], 0x16 ; C2090116 s_buffer_load_dword s19, s[0:3], 0x1c ; C209811C s_buffer_load_dword s20, s[0:3], 0x1d ; C20A011D s_buffer_load_dword s21, s[0:3], 0x20 ; C20A8120 s_buffer_load_dword s22, s[0:3], 0x21 ; C20B0121 s_buffer_load_dword s23, s[0:3], 0x22 ; C20B8122 s_buffer_load_dword s24, s[0:3], 0x24 ; C20C0124 s_buffer_load_dword s25, s[0:3], 0x25 ; C20C8125 s_buffer_load_dword s2, s[0:3], 0x26 ; C2010126 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v1, s13 ; 7E02020D v_mul_f32_e32 v1, s10, v1 ; 1002020A v_mov_b32_e32 v9, s13 ; 7E12020D v_mul_f32_e32 v9, s11, v9 ; 1012120B v_mov_b32_e32 v10, s13 ; 7E14020D v_mul_f32_e32 v10, s12, v10 ; 1014140C v_mul_f32_e32 v1, v11, v1 ; 1002030B v_mul_f32_e32 v9, v12, v9 ; 1012130C v_mul_f32_e32 v10, v13, v10 ; 1014150D v_sub_f32_e64 v11, 1.0, s4 ; D208000B 000008F2 v_mul_f32_e32 v11, v17, v11 ; 10161711 v_sub_f32_e64 v12, 1.0, s5 ; D208000C 00000AF2 v_mul_f32_e32 v12, v18, v12 ; 10181912 v_sub_f32_e64 v13, 1.0, s6 ; D208000D 00000CF2 v_mul_f32_e32 v13, v19, v13 ; 101A1B13 v_mov_b32_e32 v17, s16 ; 7E220210 v_mad_f32 v1, s9, v1, v17 ; D2820001 04460209 v_mov_b32_e32 v17, s17 ; 7E220211 v_mad_f32 v9, s9, v9, v17 ; D2820009 04461209 v_mov_b32_e32 v17, s18 ; 7E220212 v_mad_f32 v10, s9, v10, v17 ; D282000A 04461409 v_mad_f32 v14, 2.0, v14, -1.0 ; D282000E 03CE1CF4 v_mad_f32 v15, 2.0, v15, -1.0 ; D282000F 03CE1EF4 v_mad_f32 v16, 2.0, v16, -1.0 ; D2820010 03CE20F4 v_mov_b32_e32 v17, s7 ; 7E220207 v_mul_f32_e32 v18, v14, v14 ; 10241D0E v_mad_f32 v18, v15, v15, v18 ; D2820012 044A1F0F v_mad_f32 v18, v16, v16, v18 ; D2820012 044A2110 v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 v_mad_f32 v11, s15, v11, v17 ; D282000B 0446160F v_mov_b32_e32 v17, s8 ; 7E220208 v_mad_f32 v12, s15, v12, v17 ; D282000C 0446180F v_mov_b32_e32 v17, s14 ; 7E22020E v_mad_f32 v13, s15, v13, v17 ; D282000D 04461A0F v_min_f32_e32 v17, 0x7f7fffff, v18 ; 1E2224FF 7F7FFFFF v_mul_f32_e32 v18, v6, v6 ; 10240D06 v_mad_f32 v18, v7, v7, v18 ; D2820012 044A0F07 v_mad_f32 v18, v8, v8, v18 ; D2820012 044A1108 v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 v_mul_f32_e32 v14, v17, v14 ; 101C1D11 v_mul_f32_e32 v15, v17, v15 ; 101E1F11 v_mul_f32_e32 v16, v17, v16 ; 10202111 v_min_f32_e32 v17, 0x7f7fffff, v18 ; 1E2224FF 7F7FFFFF v_mul_f32_e32 v18, v17, v6 ; 10240D11 v_mul_f32_e32 v18, v18, v14 ; 10241D12 v_mul_f32_e32 v19, v17, v7 ; 10260F11 v_mad_f32 v18, v15, v19, v18 ; D2820012 044A270F v_mul_f32_e32 v19, v17, v8 ; 10261111 v_mad_f32 v18, v16, v19, v18 ; D2820012 044A2710 v_mul_f32_e32 v19, v14, v18 ; 1026250E v_mad_f32 v19, v18, v14, v19 ; D2820013 044E1D12 v_mad_f32 v6, -v6, v17, v19 ; D2820006 244E2306 v_mul_f32_e32 v19, v15, v18 ; 1026250F v_mad_f32 v19, v18, v15, v19 ; D2820013 044E1F12 v_mad_f32 v7, -v7, v17, v19 ; D2820007 244E2307 v_mul_f32_e32 v19, v2, v2 ; 10260502 v_mad_f32 v19, v3, v3, v19 ; D2820013 044E0703 v_mad_f32 v19, v4, v4, v19 ; D2820013 044E0904 v_rsq_clamp_f32_e32 v19, v19 ; 7E265913 v_mul_f32_e32 v20, v16, v18 ; 10282510 v_mad_f32 v18, v18, v16, v20 ; D2820012 04522112 v_mad_f32 v8, -v8, v17, v18 ; D2820008 244A2308 v_min_f32_e32 v17, 0x7f7fffff, v19 ; 1E2226FF 7F7FFFFF v_mul_f32_e32 v2, v17, v2 ; 10040511 v_mul_f32_e32 v3, v17, v3 ; 10060711 v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mad_f32 v6, v7, v3, v6 ; D2820006 041A0707 v_mul_f32_e32 v2, v2, v14 ; 10041D02 v_mad_f32 v2, v15, v3, v2 ; D2820002 040A070F v_mul_f32_e32 v3, v17, v4 ; 10060911 v_mad_f32 v4, v8, v3, v6 ; D2820004 041A0708 v_mad_f32 v2, v16, v3, v2 ; D2820002 040A0710 v_add_f32_e64 v3, 0, v4 clamp ; D2060803 00020880 v_and_b32_e32 v4, 0x7fffffff, v3 ; 360806FF 7FFFFFFF v_log_f32_e32 v4, v4 ; 7E084F04 v_mul_f32_e32 v0, s19, v0 ; 10000013 v_max_f32_e32 v0, s20, v0 ; 20000014 v_min_f32_e32 v0, s19, v0 ; 1E000013 v_mul_legacy_f32_e32 v4, v0, v4 ; 0E080900 v_exp_f32_e32 v4, v4 ; 7E084B04 v_add_f32_e32 v0, 0x41000000, v0 ; 060000FF 41000000 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mul_f32_e32 v0, 0x3d22f983, v0 ; 100000FF 3D22F983 v_mul_f32_e32 v1, v0, v1 ; 10020300 v_mov_b32_e32 v4, 0xb58637bd ; 7E0802FF B58637BD v_add_f32_e32 v3, v4, v3 ; 06060704 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v1, v1, 0, vcc ; D2000001 01A90101 v_add_f32_e32 v3, v4, v2 ; 06060504 v_mul_f32_e32 v4, v11, v2 ; 1008050B v_cmp_gt_f32_e64 s[0:1], 0, v3 ; D0080000 00020680 v_cndmask_b32_e64 v3, v4, 0, s[0:1] ; D2000003 00010104 v_add_f32_e32 v1, v3, v1 ; 06020303 v_mov_b32_e32 v3, s4 ; 7E060204 v_mad_f32 v1, s21, v1, v3 ; D2820001 040E0215 v_mul_f32_e32 v3, v0, v9 ; 10061300 v_cndmask_b32_e64 v3, v3, 0, vcc ; D2000003 01A90103 v_mul_f32_e32 v4, v12, v2 ; 1008050C v_cndmask_b32_e64 v4, v4, 0, s[0:1] ; D2000004 00010104 v_add_f32_e32 v3, v4, v3 ; 06060704 v_mov_b32_e32 v4, s5 ; 7E080205 v_mad_f32 v3, s22, v3, v4 ; D2820003 04120616 v_mov_b32_e32 v4, s6 ; 7E080206 v_mul_f32_e32 v0, v0, v10 ; 10001500 v_cndmask_b32_e64 v0, v0, 0, vcc ; D2000000 01A90100 v_mul_f32_e32 v2, v13, v2 ; 1004050D v_cndmask_b32_e64 v2, v2, 0, s[0:1] ; D2000002 00010102 v_add_f32_e32 v0, v2, v0 ; 06000102 v_mad_f32 v0, s23, v0, v4 ; D2820000 04120017 v_mad_f32 v1, v11, s24, v1 ; D2820001 0404310B v_mad_f32 v2, v12, s25, v3 ; D2820002 040C330C v_mad_f32 v0, v13, s2, v0 ; D2820000 0400050D v_mul_f32_e32 v3, 0x3b800000, v5 ; 10060AFF 3B800000 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v0, v0, v3 ; 5E000700 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 24 Code Size: 936 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COORD_ORIGIN UPPER_LEFT PROPERTY FS_COORD_PIXEL_CENTER INTEGER DCL IN[0], TEXCOORD[0], PERSPECTIVE DCL IN[1], TEXCOORD[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL CONST[0..12] DCL TEMP[0..1] DCL TEMP[2], LOCAL DCL TEMP[3] IMM[0] FLT32 { -0.5000, 0.0000, 0.5000, 0.2000} 0: ADD TEMP[0].xy, IMM[0].xxxx, IN[0] 1: MOV TEMP[1].y, IMM[0].yyyy 2: DP2 TEMP[2].x, CONST[7], TEMP[0] 3: ADD TEMP[3].x, TEMP[1].yyyy, TEMP[2].xxxx 4: DP2 TEMP[2].x, CONST[8], TEMP[0] 5: ADD TEMP[3].y, TEMP[1].yyyy, TEMP[2].xxxx 6: ADD TEMP[0].xy, TEMP[3], CONST[9] 7: ADD TEMP[0].xy, TEMP[0], IMM[0].zzzz 8: TEX TEMP[0], TEMP[0], SAMP[0], 2D 9: DP2 TEMP[2].x, CONST[10], IN[0] 10: ADD TEMP[3].x, TEMP[1].yyyy, TEMP[2].xxxx 11: DP2 TEMP[2].x, CONST[11], IN[0] 12: ADD TEMP[3].y, TEMP[1].yyyy, TEMP[2].xxxx 13: ADD TEMP[0].yz, TEMP[3].xxyw, CONST[12].xxyw 14: TEX TEMP[1], TEMP[0].yzzw, SAMP[1], 2D 15: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx 16: MAD TEMP[0].xy, TEMP[0].xxxx, IMM[0].wwww, IN[0] 17: TEX TEMP[0], TEMP[0], SAMP[2], 2D 18: MUL TEMP[0], TEMP[0].xyzx, CONST[6] 19: MAD OUT[0].xyz, IN[1], TEMP[0], CONST[0] 20: MUL OUT[0].w, TEMP[0].wwww, IN[1].wwww 21: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %43 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %44 = load <32 x i8>, <32 x i8> addrspace(2)* %43, align 32, !tbaa !0 %45 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %48 = bitcast <8 x i32> addrspace(2)* %47 to <32 x i8> addrspace(2)* %49 = load <32 x i8>, <32 x i8> addrspace(2)* %48, align 32, !tbaa !0 %50 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %51 = bitcast <4 x i32> addrspace(2)* %50 to <16 x i8> addrspace(2)* %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %54 = bitcast <8 x i32> addrspace(2)* %53 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %57 = bitcast <4 x i32> addrspace(2)* %56 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %60 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %61 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %62 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %63 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %65 = fadd float %59, -5.000000e-01 %66 = fadd float %60, -5.000000e-01 %67 = fmul float %31, %65 %68 = fmul float %32, %66 %69 = fadd float %67, %68 %70 = fadd float %69, 0.000000e+00 %71 = fmul float %33, %65 %72 = fmul float %34, %66 %73 = fadd float %71, %72 %74 = fadd float %73, 0.000000e+00 %75 = fadd float %70, %35 %76 = fadd float %74, %36 %77 = fadd float %75, 5.000000e-01 %78 = fadd float %76, 5.000000e-01 %79 = bitcast float %77 to i32 %80 = bitcast float %78 to i32 %81 = insertelement <2 x i32> undef, i32 %79, i32 0 %82 = insertelement <2 x i32> %81, i32 %80, i32 1 %83 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %82, <32 x i8> %44, <16 x i8> %46, i32 2) %84 = extractelement <4 x float> %83, i32 0 %85 = fmul float %37, %59 %86 = fmul float %38, %60 %87 = fadd float %85, %86 %88 = fadd float %87, 0.000000e+00 %89 = fmul float %39, %59 %90 = fmul float %40, %60 %91 = fadd float %89, %90 %92 = fadd float %91, 0.000000e+00 %93 = fadd float %88, %41 %94 = fadd float %92, %42 %95 = bitcast float %93 to i32 %96 = bitcast float %94 to i32 %97 = insertelement <2 x i32> undef, i32 %95, i32 0 %98 = insertelement <2 x i32> %97, i32 %96, i32 1 %99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %98, <32 x i8> %49, <16 x i8> %52, i32 2) %100 = extractelement <4 x float> %99, i32 0 %101 = fmul float %84, %100 %102 = fmul float %101, 0x3FC99999A0000000 %103 = fadd float %102, %59 %104 = fmul float %101, 0x3FC99999A0000000 %105 = fadd float %104, %60 %106 = bitcast float %103 to i32 %107 = bitcast float %105 to i32 %108 = insertelement <2 x i32> undef, i32 %106, i32 0 %109 = insertelement <2 x i32> %108, i32 %107, i32 1 %110 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %109, <32 x i8> %55, <16 x i8> %58, i32 2) %111 = extractelement <4 x float> %110, i32 0 %112 = extractelement <4 x float> %110, i32 1 %113 = extractelement <4 x float> %110, i32 2 %114 = fmul float %111, %27 %115 = fmul float %112, %28 %116 = fmul float %113, %29 %117 = fmul float %111, %30 %118 = fmul float %61, %114 %119 = fadd float %118, %24 %120 = fmul float %62, %115 %121 = fadd float %120, %25 %122 = fmul float %63, %116 %123 = fadd float %122, %26 %124 = fmul float %117, %64 %125 = call i32 @llvm.SI.packf16(float %119, float %121) %126 = bitcast i32 %125 to float %127 = call i32 @llvm.SI.packf16(float %123, float %124) %128 = bitcast i32 %127 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %126, float %128, float %126, float %128) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1d ; C204011D v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 s_buffer_load_dword s9, s[0:3], 0x1c ; C204811C v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 s_buffer_load_dword s10, s[0:3], 0x21 ; C2050121 s_buffer_load_dword s11, s[0:3], 0x20 ; C2058120 s_buffer_load_dword s12, s[0:3], 0x24 ; C2060124 s_buffer_load_dword s13, s[0:3], 0x25 ; C2068125 v_add_f32_e32 v1, -0.5, v3 ; 060206F1 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, s8, v1, 0 ; D2820007 02020208 v_add_f32_e32 v8, -0.5, v2 ; 061004F1 v_mad_f32 v7, s9, v8, v7 ; D2820007 041E1009 s_buffer_load_dword s8, s[0:3], 0x28 ; C2040128 s_buffer_load_dword s9, s[0:3], 0x29 ; C2048129 v_mad_f32 v1, s10, v1, 0 ; D2820001 0202020A v_mad_f32 v1, s11, v8, v1 ; D2820001 0406100B v_add_f32_e32 v7, s12, v7 ; 060E0E0C v_add_f32_e32 v1, s13, v1 ; 0602020D v_add_f32_e32 v7, 0.5, v7 ; 060E0EF0 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 v_add_f32_e32 v8, 0.5, v1 ; 061002F0 s_buffer_load_dword s10, s[0:3], 0x2c ; C205012C s_buffer_load_dword s11, s[0:3], 0x2d ; C205812D s_buffer_load_dword s24, s[0:3], 0x30 ; C20C0130 s_buffer_load_dword s25, s[0:3], 0x31 ; C20C8131 s_load_dwordx4 s[28:31], s[4:5], 0x4 ; C08E0504 s_load_dwordx8 s[32:39], s[6:7], 0x8 ; C0D00708 s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710 s_load_dwordx4 s[4:7], s[4:5], 0x8 ; C0820508 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[16:23], s[12:15] ; F0800100 00640107 v_mad_f32 v7, s9, v3, 0 ; D2820007 02020609 v_mad_f32 v7, s8, v2, v7 ; D2820007 041E0408 v_mad_f32 v8, s11, v3, 0 ; D2820008 0202060B v_mad_f32 v8, s10, v2, v8 ; D2820008 0422040A v_add_f32_e32 v9, s24, v7 ; 06120E18 v_add_f32_e32 v10, s25, v8 ; 06141019 image_sample v7, 1, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[32:39], s[28:31] ; F0800100 00E80709 s_buffer_load_dword s8, s[0:3], 0x1a ; C204011A s_buffer_load_dword s9, s[0:3], 0x1b ; C204811B s_buffer_load_dword s10, s[0:3], 0x18 ; C2050118 s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119 s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mov_b32_e32 v7, 0x3e4ccccd ; 7E0E02FF 3E4CCCCD v_mad_f32 v8, v1, v7, v2 ; D2820008 040A0F01 v_mad_f32 v9, v1, v7, v3 ; D2820009 040E0F01 image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[40:47], s[4:7] ; F0800700 002A0108 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v7, s10, v1 ; 100E020A v_mul_f32_e32 v2, s11, v2 ; 1004040B v_mul_f32_e32 v3, s8, v3 ; 10060608 v_mul_f32_e32 v1, s9, v1 ; 10020209 v_mad_f32 v4, v4, v7, s12 ; D2820004 00320F04 v_mad_f32 v2, v5, v2, s13 ; D2820002 00360505 v_mad_f32 v3, v6, v3, s0 ; D2820003 00020706 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_cvt_pkrtz_f16_f32_e32 v1, v4, v2 ; 5E020504 v_cvt_pkrtz_f16_f32_e32 v0, v3, v0 ; 5E000103 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 12 Code Size: 392 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** timeout on cs lockup likely happen at cs 0x00000000 dw 0x00000000 timeout on cs lockup likely happen at cs 0x00000000 dw 0x00000000 timeout on cs lockup likely happen at cs 0x00000000 dw 0x00000000 timeout on cs lockup likely happen at cs 0x00000000 dw 0x00000000 timeout on cs lockup likely happen at cs 0x00000000 dw 0x00000000